import requests
from bs4 import BeautifulSoup
from shutil import copyfileobj
from urllib import request
import uuid
import xlwings as xw
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import re
import seaborn as sn
import sklearn.linear_model as lm
import statsmodels.api as sm
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
Tout d'abord, on doit acquérir l'ensemble des fichiers Excel. Ces fichiers sont disponibles sur le site https://www.vendeeglobe.org/fr/classement. En cliquant sur l'option "copier l'adresse du lien", on remarque que cette adresse est composée d'une partie fixe et une autre partie qui change selon la date du classement. Par exemple, l'adresse du lien de téléchargement du classement du 05 Mars 2021 - 08h (UTC) est https://www.vendeeglobe.org/download-race-data/vendeeglobe_20210305_080000.xlsx.
On va définir une fonction download_excel_files dans laquelle on définit une autre fonction get_ranking_dates pour récupérer l'ensemble des dates (20210305_080000, 20210305_040000 ...) ce qui nous permettra d'avoir tous les liens de téléchargement.
def download_excel_files():
############
def get_ranking_dates():
url = 'https://www.vendeeglobe.org/fr/classement'
html_code = requests.get(url).text
soup = BeautifulSoup(html_code, 'lxml')
liste = []
for x in soup.find('div', {'class': "rankings__bar m--left"}).find_all('option')[1:-1]:
liste.append(x['value'])
return liste
############
def save_with_xlwings(file):
tempfile = file
excel_app = xw.App(visible=False)
excel_book = excel_app.books.open(file)
excel_book.save(tempfile)
excel_book.close()
excel_app.quit()
############
for i in get_ranking_dates():
url = 'https://www.vendeeglobe.org/download-race-data/vendeeglobe_'+str(i)+'.xlsx'
filename = 'Excel_files/vendeeglobe_'+str(i)+'.xlsx'
with request.urlopen(url) as response, open(filename, 'wb') as out_file:
copyfileobj(response, out_file)
save_with_xlwings('Excel_files/vendeeglobe_'+str(i)+'.xlsx')
# download_excel_files()
Ensuite, on définit une fonction get_boats_infos qui nous permet d'acquérir les caractéristiques de chaque voile.
def get_boats_infos():
url = 'https://www.vendeeglobe.org/fr/glossaire'
html_code = requests.get(url).text
soup = BeautifulSoup(html_code, 'lxml')
df = pd.DataFrame(columns=["Nom du skipper",
"Nom du voile",
"Numéro de voile",
"Anciens noms du bateau",
"Architecte",
"Chantier",
"Date de lancement",
"Longueur",
"Largeur",
"Tirant d'eau",
"Déplacement (poids)",
"Nombre de dérives",
"Hauteur mât",
"Voile quille",
"Surface de voiles au près",
"Surface de voiles au portant"])
for element in soup.find_all('div', {'class': "sv-u-1 sv-u-s-1-2 sv-u-m-1-3 sv-u-l-1-6"}) :
skipper = element.find('span', {'class': "boats-list__skipper-name"}).text
bateau = element.find('h3', {'class': "boats-list__boat-name"}).text
new_row = {'Nom du skipper': skipper, 'Nom du voile': bateau}
if bateau == '':
new_row.pop('Nom du voile')
for x in element.find('ul', {'class': 'boats-list__popup-specs-list'}).find_all('li'):
new_row[x.text.split(' : ')[0]] = x.text.split(' : ')[1]
new_df = pd.DataFrame([new_row])
df = pd.concat([df, new_df], axis=0, ignore_index=True)
df = df.set_index('Nom du skipper')
return df
get_boats_infos()
| Nom du voile | Numéro de voile | Anciens noms du bateau | Architecte | Chantier | Date de lancement | Longueur | Largeur | Tirant d'eau | Déplacement (poids) | Nombre de dérives | Hauteur mât | Voile quille | Surface de voiles au près | Surface de voiles au portant | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Nom du skipper | |||||||||||||||
| Fabrice AMEDEO | NEWREST - ART & FENÊTRES | FRA 56 | No Way Back, Vento di Sardegna | VPLP/Verdier | Persico Marine | 01 Août 2015 | 18,28 m | 5,85 m | 4,50 m | 7 t | foils | 29 m | monotype | 320 m2 | 570 m2 |
| Romain ATTANASIO | PURE - Best Western® | FRA 49 | Gitana Eighty, Synerciel, Newrest-Matmut | Bruce Farr Design | Southern Ocean Marine (Nouvelle Zélande) | 08 Mars 2007 | 18,28m | 5,80m | 4,50m | 9t | 2 | 28m | acier forgé | 280 m2 | 560 m2 |
| Alexia BARRIER | TSE - 4MYPLANET | FRA72 | Famille Mary-Etamine du Lys, Initiatives Coeur... | Marc Lombard | MAG France | 01 Mars 1998 | 18,28m | 5,54m | 4,50m | 9t | 2 | 29 m | acier | 260 m2 | 580 m2 |
| Yannick BESTAVEN | Maître CoQ IV | 17 | Safran 2 - Des Voiles et Vous | Verdier - VPLP | CDK Technologies | 12 Mars 2015 | 18,28 m | 5,80 m | 4,50 m | 8 t | foils | 29 m | acier mécano soudé | 310 m2 | 550 m2 |
| Jérémie BEYOU | CHARAL | 08 | NaN | VPLP | CDK Technologies | 18 Août 2018 | 18,28 m | 5,85 m | 4,50 m | 8t | foils | 29 m | acier | 320 m2 | 600 m2 |
| Arnaud BOISSIÈRES | LA MIE CÂLINE - ARTISANS ARTIPÔLE | FRA 14 | Ecover3, Président, Gamesa, Kilcullen Voyager-... | Owen Clarke Design LLP - Clay Oliver | Hakes Marine - Mer Agitée | 03 Août 2007 | 18,28 m | 5,65 m | 4,50 m | 7,9 tonnes | foils | 29 m | basculante avec vérin | 300 m² | 610 m² |
| Louis BURTON | BUREAU VALLEE 2 | 18 | Banque Populaire VIII | Verdier - VPLP | CDK Technologies | 09 Juin 2015 | 18,28 m | 5,80 m | 4,50 m | 7,6 t | foils | 28 m | acier | 300 m2 | 600 m2 |
| Didac COSTA | ONE PLANET ONE OCEAN | ESP 33 | Kingfisher - Educacion sin Fronteras - Forum M... | Owen Clarke Design | Martens Yachts | 02 Février 2000 | 18,28 m | 5,30 m | 4,50 m | 8,9 t | 2 | 26 m | acier | 240 m2 | 470 m2 |
| Manuel COUSIN | GROUPE SÉTIN | FRA 71 | Paprec-Virbac2, Estrella Damm, We are Water, L... | Bruce Farr Yacht Design | Southern Ocean Marine (Nouvelle-Zélande) | 02 Février 2007 | 18,28 m | 5,80 m | 4,50 m | 9 t | 2 asymétriques | 28,50 | basculante sur vérin hydraulique | 270 m2 | 560 m2 |
| Clarisse CREMER | BANQUE POPULAIRE X | FRA30 | Macif - SMA | Verdier - VPLP | CDK - Mer Agitée | 01 Mars 2011 | 18,28 m | 5,70 m | 4,5 m | 7,7 t | 2 | 29 m | acier forgé | 340 m2 | 570 m2 |
| Charlie DALIN | APIVIA | FRA 79 | NaN | Verdier | CDK technologies - MerConcept | 05 Août 2019 | 18,28 m | 5,85 m | 4,50 m | 8 t | foils | 29 m | acier | 350 m2 | 560 m2 |
| Sam DAVIES | INITIATIVES-COEUR | FRA109 | Maître CoQ - Banque Populaire VII- Foncia II | VPLP - Verdier | CDK Technologies | 20 Septembre 2010 | 18,28 m | 5,70 m | 4,50 m | 7,8 t | foils | 27 m | acier forgé | 300 m2 | 600 m2 |
| Sébastien DESTREMAU | MERCI | 69 | Foresight Natural Energy, Maisonneuve | Lavanos | Artech do Brasil | 15 Janvier 2005 | 18,28 m | 5,60 m | 4,50 m | 8,5 t | 2 | 29 m | acier | 250 m2 | 650 m2 |
| Benjamin DUTREUX | OMIA - WATER FAMILY | FRA09 | Spirit of Yukoh, Neutrogena, Hugo Boss, Estrel... | Bruce Farr Design | Offshore Challenge - Cowes | 03 Juillet 2007 | 18,28 m | 5,85 m | 4,50 m | 8 t | 2 | 29 m | acier forgé | 300 m2 | 700 m2 |
| Kevin ESCOFFIER | PRB | FRA 85 | NaN | Verdier - VPLP | CDK Technologies | 08 Mars 2010 | 18,28 m | 5,50 m | 4,50 m | NC | foils | 27,40 m | Acier mécano soudé | 300 m2 | 600 m2 |
| Clément GIRAUD | Compagnie du Lit / Jiliti | FRA83 | Delta Dore, Bureau Vallée, Vers un Monde sans ... | Bruce Farr design | JMV Cherbourg | 26 Juillet 2006 | 18,28 m | 5,75 m | 4,50 m | 8,5 t | 2 | 29 m | acier forgé | 300 m2 | 620 m2 |
| François Guiffant | NaN | NaN | NaN | NaN | NaN | 01 Janvier 1970 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Pip HARE | MEDALLIA | GBR77 | Armor Lux, We Are Water, La Fabrique | Pierre Rolland | Bernard Stamm | 03 Juillet 1999 | 18,28 m | 5,70 m | 4,50 m | 9 t | 2 | 29 m | carbone | 300 m2 | 580 m2 |
| Boris HERRMANN | SEAEXPLORER - YACHT CLUB DE MONACO | 16 | Edmond de Rothschild, Malizia - Yacht Club de ... | Verdier - VPLP | Multiplast | 07 Août 2015 | 18,28 m | 5,70 m | 4,50 m | 7,6 t | foils | 29 m | acier | 290 m2 | 490 m2 |
| Ari HUUSELA | STARK | FIN222 | Aviva, GAES Centros Auditivos | Owen Clarke Design | Hakes Marine - Wellington (Nouvelle-Zélande) | 06 Août 2007 | 18,28 m | 5,80 m | 4,50 m | 8,5 t | 2 | 28 m | acier | 270 m2 | 580 m2 |
| Isabelle JOSCHKE | MACSF | FRA 27 | Quéguiner, Safran | Verdier - VPLP | Chantier Naval de Larros | 04 Juillet 2007 | 18,28 m | 5,60 m | 4,50 m | 7,7 t | foils | 27 m | acier | 300 m2 | 650 m2 |
| Jean LE CAM | Yes We Cam! | 001 | Cheminées Poujoulat, Mare, Maître CoQ, Mapfre,... | Bruce Farr design | CDK Technologies | 03 Janvier 2007 | 18,28 m | 5,90 m | 4,50 m | 8 t | 2 | 28 m | acier forgé | 300 m2 | 620 m2 |
| Stéphane LE DIRAISON | TIME FOR OCEANS | FRA 92 | Hugo Boss, Energa, Compagnie du Lit-Boulogne B... | Finot-Conq Design | Neville Hutton | 01 Juin 2007 | 18,28 m | 5,84 m | 4,50 m | 8,5 t | foils | 28 m | monotype | 300 m2 | 550 m2 |
| Miranda MERRON | CAMPAGNE DE FRANCE | FRA50 | Great America IV, Mirabaud, Temenos | Owen Clarke | Southern Ocean Marine, Tauranga | 04 Mai 2006 | 18,28 m | 5,50 m | 4,50 m | 8,5 t | 2 | 28 m | carbone | 330 m2 | 600 m2 |
| Giancarlo PEDOTE | PRYSMIAN GROUP | ITA 34 | Saint-Michel - Virbac | VPLP - Verdier | Multiplast | 02 Avril 2015 | 18,28 m | 5,80 m | 4,5 m | 8 t | foils | 29 m | acier forgé | 300 m2 | 600 m2 |
| Alan ROURA | LA FABRIQUE | SUI07 | Brit Air, Votre Nom autour du Monde, MACSF | Groupe Finot-Conq | Multiplast | 01 Août 2007 | 18,28 m | 5,90 m | 4,50 m | nc | foils | 27 m | carbone | 290 m2 | 580 m2 |
| Thomas RUYANT | LinkedOut | NaN | NaN | Verdier | Persico | 03 Septembre 2019 | 18,28 m | 5,85 m | 4,50 m | 8 t | foils | 29 m | acier forgé | 350 m2 | 560 m2 |
| Damien SEGUIN | GROUPE APICIL | FRA1000 | Comme Un Seul Homme, DCNS | Groupe Finot-Conq | Multiplast | 10 Août 2008 | 18,28 m | 5,85 m | 4,50 m | 8,5 t | 2 | 29 m | acier forgé | 350 m2 | 610 m2 |
| Kojiro SHIRAISHI | DMG MORI Global One | JPN 11 | NaN | VPLP | Multiplast | 05 Septembre 2019 | 18,28 m | 5,85 m | 4,50 m | 8 t | foils | 29 m | acier forgé | 320 m2 | 580 m2 |
| Sébastien SIMON | ARKEA PAPREC | 4 | NaN | Juan Kouyoumdjian | CDK Technologies / Assemblage | 19 Juillet 2019 | 18,28 m | 5,70 m | 4,50 m | 8 t | foiler | 29 m | Inox usiné | 260 m2 | 600 m2 |
| Maxime SOREL | V and B-MAYENNE | FRA53 | Le Souffle du Nord, Groupe Bel | Verdier - VPLP | Indiana Yachting (Scarlino, Italie) | 07 Septembre 2007 | 18,28 m | 5,50 m | 4,50 m | 7,7 t | 2 | 29 m | acier | 365 m2 | 700 m2 |
| Alex THOMSON | HUGO BOSS | GBR 99 | NaN | VPLP - Alex Thomson Racing (led by Pete Hobson) | Carrington Boats | 15 Août 2019 | 18,28 m | 5,40 m | 4,50 m | 7,7 t | foils | 29 m | acier forgé | 330 m2 | 630 m2 |
| Armel TRIPON | L'OCCITANE EN PROVENCE | 2 | NaN | Samuel Manuard | Black Pepper© | 31 Janvier 2020 | 18,28 m | 5,50 m | 4,50 m | 7,8 t | foils | 28 m | acier forgé | 270 m2 | 535 m2 |
| Nicolas TROUSSEL | CORUM L'EPARGNE | FRA 6 | NaN | Juan Kouyoumdjian | CDK technologies - Mer Agitée | 15 Mai 2020 | 18,28 m | 5,70 m | 4,50 m | 7,9 t | foils | 27,30 m | NaN | 270 m2 | 535 m2 |
On prend l'exemple du classement du 22 Novembre 2022 - 08h (UTC) :
pd.read_excel('Excel_files/vendeeglobe_20201122_080000.xlsx')
| Unnamed: 0 | Unnamed: 1 | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | ... | Unnamed: 11 | Unnamed: 12 | Unnamed: 13 | Unnamed: 14 | Unnamed: 15 | Unnamed: 16 | Unnamed: 17 | Unnamed: 18 | Unnamed: 19 | Unnamed: 20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | NaN | Classement du dimanche 22 novembre 2020 à 09h0... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | NaN | Rang\nRank | Nat. / Voile\nNat. / Sail | Skipper / Bateau\nSkipper / crew | NaN | NaN | NaN | Depuis 30 minutes\nSince 30 minutes | NaN | NaN | ... | Depuis le dernier classement\nSince the the la... | NaN | NaN | NaN | Depuis 24 heures\nSince 24 hours | NaN | NaN | NaN | DTF | DTL |
| 3 | NaN | NaN | NaN | NaN | Heure FR\nHour FR | Latitude\nLatitude | Longitude\nLongitude | Cap\nHeading | Vitesse\nSpeed | VMG\nVMG | ... | Cap\nHeading | Vitesse\nSpeed | VMG\nVMG | Distance\nDistance | Cap\nHeading | Vitesse\nSpeed | VMG\nVMG | Distance\nDistance | NaN | NaN |
| 4 | NaN | 1 | \nFRA 59 | Thomas Ruyant\nLinkedOut | 08:30 FR\n | 25°08.92'S | 24°41.04'W | 149° | 9.0 kts | 9.0 kts | ... | 133° | 10.6 kts | 10.4 kts | 42.5 nm | 145° | 14.8 kts | 14.8 kts | 356.3 nm | 19766.8 nm | 0.0 nm |
| 5 | NaN | 2 | \nFRA 79 | Charlie Dalin\nAPIVIA | 08:30 FR\n | 25°13.83'S | 25°26.26'W | 139° | 16.5 kts | 16.5 kts | ... | 141° | 15.0 kts | 15.0 kts | 60.0 nm | 142° | 17.3 kts | 17.3 kts | 415.5 nm | 19786.7 nm | 20.0 nm |
| 6 | NaN | 3 | \nGBR 99 | Alex Thomson\nHUGO BOSS | 08:30 FR\n | 24°45.75'S | 26°51.00'W | 143° | 5.8 kts | 5.8 kts | ... | 135° | 5.7 kts | 5.7 kts | 22.9 nm | 143° | 12.3 kts | 12.3 kts | 294.7 nm | 19858.4 nm | 91.7 nm |
| 7 | NaN | 4 | \nFRA 01 | Jean Le Cam\nYes we Cam ! | 08:30 FR\n | 20°58.86'S | 26°12.37'W | 157° | 14.8 kts | 14.7 kts | ... | 152° | 14.2 kts | 14.2 kts | 56.8 nm | 153° | 15.3 kts | 15.3 kts | 366.5 nm | 20026.1 nm | 259.3 nm |
| 8 | NaN | 5 | \nFRA 85 | Kevin Escoffier\nPRB | 08:30 FR\n | 22°03.06'S | 29°07.32'W | 153° | 14.6 kts | 14.2 kts | ... | 146° | 15.2 kts | 15.2 kts | 61.0 nm | 159° | 15.9 kts | 15.6 kts | 381.7 nm | 20063.2 nm | 296.4 nm |
| 9 | NaN | 6 | \nMON 10 | Boris Herrmann\nSeaexplorer - Yacht Club De Mo... | 08:30 FR\n | 21°20.60'S | 29°24.42'W | 164° | 16.8 kts | 15.4 kts | ... | 159° | 15.1 kts | 14.4 kts | 60.2 nm | 164° | 17.2 kts | 16.7 kts | 412.7 nm | 20105.9 nm | 339.1 nm |
| 10 | NaN | 7 | \nFRA 17 | Yannick Bestaven\nMaître Coq IV | 08:30 FR\n | 20°34.37'S | 28°45.60'W | 159° | 15.0 kts | 14.5 kts | ... | 153° | 13.8 kts | 13.7 kts | 55.3 nm | 162° | 16.7 kts | 16.4 kts | 399.6 nm | 20120.8 nm | 354.0 nm |
| 11 | NaN | 8 | \nFRA 18 | Louis Burton\nBureau Vallée 2 | 08:30 FR\n | 21°37.47'S | 30°31.47'W | 156° | 15.7 kts | 14.8 kts | ... | 153° | 13.9 kts | 13.4 kts | 55.6 nm | 172° | 15.1 kts | 13.8 kts | 362.0 nm | 20134.7 nm | 368.0 nm |
| 12 | NaN | 9 | \nFRA 4 | Sébastien Simon\nARKEA PAPREC | 08:30 FR\n | 21°21.42'S | 30°27.29'W | 148° | 14.1 kts | 13.9 kts | ... | 148° | 14.1 kts | 13.9 kts | 56.4 nm | 162° | 17.0 kts | 16.4 kts | 407.7 nm | 20143.8 nm | 377.0 nm |
| 13 | NaN | 10 | \nFRA 109 | Samantha Davies\nInitiatives - Coeur | 08:30 FR\n | 20°28.35'S | 30°06.93'W | 173° | 18.4 kts | 15.6 kts | ... | 165° | 14.7 kts | 13.5 kts | 58.7 nm | 166° | 16.2 kts | 15.6 kts | 389.2 nm | 20171.5 nm | 404.8 nm |
| 14 | NaN | 11 | \nFRA 09 | Benjamin Dutreux\nOMIA - Water Family | 08:30 FR\n | 15°47.71'S | 29°13.79'W | 178° | 14.0 kts | 12.7 kts | ... | 176° | 13.2 kts | 12.3 kts | 52.8 nm | 169° | 13.3 kts | 13.0 kts | 319.9 nm | 20381.6 nm | 614.8 nm |
| 15 | NaN | 12 | \nFRA 1000 | Damien Seguin\nGroupe APICIL | 08:30 FR\n | 15°59.59'S | 30°26.68'W | 162° | 12.7 kts | 12.4 kts | ... | 166° | 13.7 kts | 13.2 kts | 54.6 nm | 173° | 14.9 kts | 14.3 kts | 357.6 nm | 20404.4 nm | 637.7 nm |
| 16 | NaN | 13 | \nITA 34 | Giancarlo Pedote\nPrysmian Group | 08:30 FR\n | 16°24.85'S | 31°45.70'W | 172° | 18.0 kts | 16.2 kts | ... | 175° | 16.6 kts | 14.7 kts | 66.3 nm | 177° | 16.7 kts | 15.4 kts | 400.6 nm | 20423.3 nm | 656.6 nm |
| 17 | NaN | 14 | \nFRA 53 | Maxime Sorel\nV And B Mayenne | 08:30 FR\n | 12°17.25'S | 30°02.95'W | 186° | 13.7 kts | 11.9 kts | ... | 181° | 14.4 kts | 13.2 kts | 57.6 nm | 176° | 14.3 kts | 13.8 kts | 342.9 nm | 20592.4 nm | 825.7 nm |
| 18 | NaN | 15 | \nFRA 27 | Isabelle Joschke\nMACSF | 08:30 FR\n | 11°22.76'S | 30°23.04'W | 192° | 16.4 kts | 13.3 kts | ... | 182° | 18.6 kts | 16.9 kts | 74.6 nm | 180° | 16.9 kts | 16.1 kts | 405.9 nm | 20650.3 nm | 883.5 nm |
| 19 | NaN | 16 | \nFRA 30 | Clarisse Cremer\nBanque Populaire X | 08:30 FR\n | 09°21.58'S | 30°07.41'W | 173° | 14.3 kts | 13.9 kts | ... | 172° | 13.6 kts | 13.3 kts | 54.6 nm | 179° | 13.7 kts | 13.2 kts | 329.6 nm | 20756.5 nm | 989.7 nm |
| 20 | NaN | 17 | \nFRA 49 | Romain Attanasio\nPure - Best Western Hotels a... | 08:30 FR\n | 09°17.23'S | 30°12.79'W | 172° | 14.1 kts | 13.8 kts | ... | 175° | 13.7 kts | 13.2 kts | 54.9 nm | 180° | 13.5 kts | 12.9 kts | 324.6 nm | 20762.5 nm | 995.7 nm |
| 21 | NaN | 18 | \nSUI 7 | Alan Roura\nLa Fabrique | 08:30 FR\n | 08°32.10'S | 30°30.38'W | 178° | 15.8 kts | 14.9 kts | ... | 181° | 15.0 kts | 14.0 kts | 60.0 nm | 187° | 13.8 kts | 12.7 kts | 332.2 nm | 20810.9 nm | 1044.1 nm |
| 22 | NaN | 19 | \nFRA 92 | Stéphane Le Diraison\nTime For Oceans | 08:30 FR\n | 03°50.79'S | 30°35.44'W | 174° | 14.7 kts | 14.4 kts | ... | 178° | 13.5 kts | 13.0 kts | 54.2 nm | 188° | 11.9 kts | 10.9 kts | 285.1 nm | 21077.7 nm | 1311.0 nm |
| 23 | NaN | 20 | \nESP 33 | Didac Costa\nOne Planet One Ocean | 08:30 FR\n | 04°32.29'N | 28°02.12'W | 189° | 8.3 kts | 7.8 kts | ... | 187° | 7.0 kts | 6.7 kts | 27.9 nm | 198° | 5.6 kts | 5.0 kts | 133.5 nm | 21528.5 nm | 1761.7 nm |
| 24 | NaN | 21 | \nGBR 777 | Pip Hare\nMedallia | 08:30 FR\n | 04°22.29'N | 28°59.05'W | 176° | 7.5 kts | 7.4 kts | ... | 182° | 8.0 kts | 7.7 kts | 31.9 nm | 183° | 5.5 kts | 5.3 kts | 132.2 nm | 21529.9 nm | 1763.1 nm |
| 25 | NaN | 22 | \nFRA 14 | Arnaud Boissieres\nLa Mie Câline - Artisans Ar... | 08:30 FR\n | 04°30.19'N | 28°55.89'W | 198° | 7.3 kts | 6.4 kts | ... | 202° | 6.5 kts | 5.5 kts | 26.1 nm | 185° | 5.9 kts | 5.7 kts | 141.3 nm | 21536.9 nm | 1770.2 nm |
| 26 | NaN | 23 | \nFRA 71 | Manuel Cousin\nGroupe Sétin | 08:30 FR\n | 05°11.10'N | 28°41.65'W | 190° | 9.0 kts | 8.3 kts | ... | 205° | 6.8 kts | 5.6 kts | 27.3 nm | 196° | 5.7 kts | 5.1 kts | 136.5 nm | 21574.2 nm | 1807.4 nm |
| 27 | NaN | 24 | FR\nFRA 02 | Armel Tripon\nL'Occitane en Provence | 08:30 FR\n | 07°21.39'N | 29°14.05'W | 193° | 9.2 kts | 8.4 kts | ... | 183° | 11.9 kts | 11.5 kts | 47.6 nm | 180° | 11.0 kts | 10.8 kts | 264.4 nm | 21708.3 nm | 1941.6 nm |
| 28 | NaN | 25 | \nFRA 72 | Alexia Barrier\nTSE - 4myplanet | 08:30 FR\n | 09°39.12'N | 27°22.07'W | 171° | 10.4 kts | 10.4 kts | ... | 171° | 10.2 kts | 10.2 kts | 40.6 nm | 176° | 9.4 kts | 9.4 kts | 226.3 nm | 21824.7 nm | 2057.9 nm |
| 29 | NaN | 26 | \nFRA 83 | Clément Giraud\nCompagnie du lit - Jiliti | 08:30 FR\n | 09°52.55'N | 27°52.70'W | 186° | 12.2 kts | 11.8 kts | ... | 185° | 11.8 kts | 11.4 kts | 47.0 nm | 184° | 10.5 kts | 10.2 kts | 251.1 nm | 21842.6 nm | 2075.9 nm |
| 30 | NaN | 27 | \nFRA 50 | Miranda Merron\nCampagne de France | 08:30 FR\n | 09°51.00'N | 28°14.96'W | 188° | 9.3 kts | 8.9 kts | ... | 189° | 9.8 kts | 9.3 kts | 39.1 nm | 189° | 9.0 kts | 8.5 kts | 215.0 nm | 21844.7 nm | 2077.9 nm |
| 31 | NaN | 28 | \nFIN 222 | Ari Huusela\nStark | 08:30 FR\n | 10°18.94'N | 27°40.74'W | 177° | 9.9 kts | 9.8 kts | ... | 175° | 9.7 kts | 9.7 kts | 38.8 nm | 190° | 8.3 kts | 7.9 kts | 198.0 nm | 21866.9 nm | 2100.1 nm |
| 32 | NaN | 29 | FR\nFRA 69 | Sébastien Destremau\nMerci | 08:30 FR\n | 10°19.79'N | 27°58.86'W | 173° | 9.3 kts | 9.3 kts | ... | 183° | 9.6 kts | 9.3 kts | 38.3 nm | 189° | 9.1 kts | 8.7 kts | 218.5 nm | 21870.5 nm | 2103.8 nm |
| 33 | NaN | 30 | \nFRA 56 | Fabrice Amedeo\nNewrest - Art et Fenetres | 08:30 FR\n | 10°10.32'N | 29°07.14'W | 187° | 10.7 kts | 10.2 kts | ... | 182° | 9.9 kts | 9.7 kts | 39.6 nm | 181° | 10.8 kts | 10.6 kts | 259.6 nm | 21872.9 nm | 2106.1 nm |
| 34 | NaN | 31 | \nJPN 11 | Kojiro Shiraishi\nDMG MORI Global One | 08:30 FR\n | 14°54.36'N | 29°06.92'W | 165° | 10.5 kts | 10.4 kts | ... | 176° | 9.6 kts | 9.6 kts | 38.5 nm | 244° | 7.4 kts | 2.5 kts | 177.2 nm | 22152.5 nm | 2385.7 nm |
| 35 | NaN | 32 | \nFRA 8 | Jérémie Beyou\nCharal | 08:30 FR\n | 27°28.17'N | 19°37.29'W | 178° | 17.4 kts | 16.9 kts | ... | 184° | 13.7 kts | 13.6 kts | 54.8 nm | 190° | 14.1 kts | 14.1 kts | 338.5 nm | 22909.6 nm | 3142.8 nm |
| 36 | NaN | RET | \nFRA 6 | Nicolas Troussel\nCORUM L'Épargne | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 37 | NaN | Traitements et calculs : Géovoile, un service ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 38 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 39 | NaN | VMG : Velocity Made Good = projection du vecte... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 40 | NaN | DTF : Distance To Finish = Distance théorique ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
41 rows × 21 columns
def prepare_excel_files(date):
df = pd.read_excel('Excel_files/vendeeglobe_'+date+'.xlsx')
i = df.loc[df['Unnamed: 1']=='Traitements et calculs : Géovoile, un service Hauwell Studios'].index[0]
df = df.iloc[4:i,1:].reset_index(drop=True)
c = df.columns.tolist()
df = df[c[0:6]+[c[6]]+[c[10]]+[c[14]]+[c[7]]+[c[11]]+[c[15]]+[c[8]]+[c[12]]+[c[16]]+[c[9]]+[c[13]]+[c[17]]+c[18:]]
cols = [('Rang',''),
('Nat/Voile',''),
('Skipper/Bateau',''),
('Heure (UTC)',''),
('Latitude',''),
('Longitude',''),
('Cap (°)','Depuis 30 minutes'),
('Cap (°)','Depuis le dernier classement'),
('Cap (°)','Depuis 24 heures'),
('Vitesse (kts)','Depuis 30 minutes'),
('Vitesse (kts)','Depuis le dernier classement'),
('Vitesse (kts)','Depuis 24 heures'),
('VMG (kts)','Depuis 30 minutes'),
('VMG (kts)','Depuis le dernier classement'),
('VMG (kts)','Depuis 24 heures'),
('Distance (nm)','Depuis 30 minutes'),
('Distance (nm)','Depuis le dernier classement'),
('Distance (nm)','Depuis 24 heures'),
('DTF (nm)',''),
('DTL (nm)','')]
df.columns = pd.MultiIndex.from_tuples(cols)
# Nationalité, numéro et nom du voile, nom du skipper
nat_voile = df['Nat/Voile'].str.split("\n").str[1].str.split(" ")
skipper_bateau = df['Skipper/Bateau'].str.split("\n")
skipper = skipper_bateau.apply(lambda x: x[0]).str.split(" ", n=1)
df['Nationalité'] = nat_voile.apply(lambda x: x[0])
df['Numéro du voile'] = nat_voile.apply(lambda x: x[1])
df['Numéro du voile'] = df['Numéro du voile'].str.zfill(2)
df['Nom du skipper'] = skipper.apply(lambda x: x[0].capitalize()+" "+x[1].upper())
df['Nom du voile'] = skipper_bateau.apply(lambda x: x[1])
c = df.columns.tolist()
df = df[[c[0]]+ c[-4:] + c[3:-4]]
# Heure (on choisit les heures UTC et non pas FR)
df['Heure (UTC)'] = pd.to_datetime(df['Heure (UTC)'].str.split(" ").str[0], format='%H:%M')
df['Heure (UTC)'] = df['Heure (UTC)'].apply(lambda x: x - datetime.timedelta(hours=1))
df['Heure (UTC)'] = df['Heure (UTC)'].dt.time
# Latitude et Longitude : conversion degrés, minutes, secondes => décimal
def get_latitude(lat_degree):
liste = re.split("°|\.|'", lat_degree)
if liste[3] == 'N':
lat_decimal = int(liste[0]) + int(liste[1])/60 + int(liste[2])/3600
else:
lat_decimal = -(int(liste[0])+ int(liste[1])/60 + int(liste[2])/3600)
return lat_decimal
def get_longitude(lon_degree):
liste = re.split("°|\.|'", lon_degree)
if liste[3] == 'E':
lon_decimal = int(liste[0]) + int(liste[1])/60 + int(liste[2])/3600
else:
lon_decimal = -(int(liste[0])+ int(liste[1])/60 + int(liste[2])/3600)
return lon_decimal
df['Latitude'] = df['Latitude'].apply(lambda x: get_latitude(x) if pd.notnull(x) else x)
df['Longitude'] = df['Longitude'].apply(lambda x: get_longitude(x) if pd.notnull(x) else x)
# Cap (°), Vitesse (kts), VMG (kts), Distance (nm)
df['Cap (°)'] = df['Cap (°)'].apply(lambda x: x.str.rstrip('°')).apply(pd.to_numeric)
df['Vitesse (kts)'] = df['Vitesse (kts)'].apply(lambda x: x.str.rstrip(" kts")).apply(pd.to_numeric)
df['VMG (kts)'] = df['VMG (kts)'].apply(lambda x: x.str.rstrip(" kts")).apply(pd.to_numeric)
df['Distance (nm)'] = df['Distance (nm)'].apply(lambda x: x.str.rstrip(" nm")).apply(pd.to_numeric)
# DTF (nm) et DTL (nm)
df['DTF (nm)'] = pd.to_numeric(df['DTF (nm)'].str.rstrip(" nm"))
df['DTL (nm)'] = pd.to_numeric(df['DTL (nm)'].str.rstrip(" nm"))
return df
prepare_excel_files('20201122_080000')
| Rang | Nationalité | Numéro du voile | Nom du skipper | Nom du voile | Heure (UTC) | Latitude | Longitude | Cap (°) | ... | Vitesse (kts) | VMG (kts) | Distance (nm) | DTF (nm) | DTL (nm) | |||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Depuis 30 minutes | Depuis le dernier classement | ... | Depuis le dernier classement | Depuis 24 heures | Depuis 30 minutes | Depuis le dernier classement | Depuis 24 heures | Depuis 30 minutes | Depuis le dernier classement | Depuis 24 heures | |||||||||||
| 0 | 1 | FRA | 59 | Thomas RUYANT | LinkedOut | 07:30:00 | -25.158889 | -24.684444 | 149.0 | 133.0 | ... | 10.6 | 14.8 | 9.0 | 10.4 | 14.8 | 4.5 | 42.5 | 356.3 | 19766.8 | 0.0 |
| 1 | 2 | FRA | 79 | Charlie DALIN | APIVIA | 07:30:00 | -25.239722 | -25.440556 | 139.0 | 141.0 | ... | 15.0 | 17.3 | 16.5 | 15.0 | 17.3 | 8.3 | 60.0 | 415.5 | 19786.7 | 20.0 |
| 2 | 3 | GBR | 99 | Alex THOMSON | HUGO BOSS | 07:30:00 | -24.770833 | -26.850000 | 143.0 | 135.0 | ... | 5.7 | 12.3 | 5.8 | 5.7 | 12.3 | 2.9 | 22.9 | 294.7 | 19858.4 | 91.7 |
| 3 | 4 | FRA | 01 | Jean LE CAM | Yes we Cam ! | 07:30:00 | -20.990556 | -26.210278 | 157.0 | 152.0 | ... | 14.2 | 15.3 | 14.7 | 14.2 | 15.3 | 7.4 | 56.8 | 366.5 | 20026.1 | 259.3 |
| 4 | 5 | FRA | 85 | Kevin ESCOFFIER | PRB | 07:30:00 | -22.051667 | -29.125556 | 153.0 | 146.0 | ... | 15.2 | 15.9 | 14.2 | 15.2 | 15.6 | 7.3 | 61.0 | 381.7 | 20063.2 | 296.4 |
| 5 | 6 | MON | 10 | Boris HERRMANN | Seaexplorer - Yacht Club De Monaco | 07:30:00 | -21.350000 | -29.411667 | 164.0 | 159.0 | ... | 15.1 | 17.2 | 15.4 | 14.4 | 16.7 | 8.4 | 60.2 | 412.7 | 20105.9 | 339.1 |
| 6 | 7 | FRA | 17 | Yannick BESTAVEN | Maître Coq IV | 07:30:00 | -20.576944 | -28.766667 | 159.0 | 153.0 | ... | 13.8 | 16.7 | 14.5 | 13.7 | 16.4 | 7.5 | 55.3 | 399.6 | 20120.8 | 354.0 |
| 7 | 8 | FRA | 18 | Louis BURTON | Bureau Vallée 2 | 07:30:00 | -21.629722 | -30.529722 | 156.0 | 153.0 | ... | 13.9 | 15.1 | 14.8 | 13.4 | 13.8 | 7.9 | 55.6 | 362.0 | 20134.7 | 368.0 |
| 8 | 9 | FRA | 04 | Sébastien SIMON | ARKEA PAPREC | 07:30:00 | -21.361667 | -30.458056 | 148.0 | 148.0 | ... | 14.1 | 17.0 | 13.9 | 13.9 | 16.4 | 7.1 | 56.4 | 407.7 | 20143.8 | 377.0 |
| 9 | 10 | FRA | 109 | Samantha DAVIES | Initiatives - Coeur | 07:30:00 | -20.476389 | -30.125833 | 173.0 | 165.0 | ... | 14.7 | 16.2 | 15.6 | 13.5 | 15.6 | 9.2 | 58.7 | 389.2 | 20171.5 | 404.8 |
| 10 | 11 | FRA | 09 | Benjamin DUTREUX | OMIA - Water Family | 07:30:00 | -15.803056 | -29.238611 | 178.0 | 176.0 | ... | 13.2 | 13.3 | 12.7 | 12.3 | 13.0 | 7.0 | 52.8 | 319.9 | 20381.6 | 614.8 |
| 11 | 12 | FRA | 1000 | Damien SEGUIN | Groupe APICIL | 07:30:00 | -15.999722 | -30.452222 | 162.0 | 166.0 | ... | 13.7 | 14.9 | 12.4 | 13.2 | 14.3 | 6.4 | 54.6 | 357.6 | 20404.4 | 637.7 |
| 12 | 13 | ITA | 34 | Giancarlo PEDOTE | Prysmian Group | 07:30:00 | -16.423611 | -31.769444 | 172.0 | 175.0 | ... | 16.6 | 16.7 | 16.2 | 14.7 | 15.4 | 9.0 | 66.3 | 400.6 | 20423.3 | 656.6 |
| 13 | 14 | FRA | 53 | Maxime SOREL | V And B Mayenne | 07:30:00 | -12.290278 | -30.059722 | 186.0 | 181.0 | ... | 14.4 | 14.3 | 11.9 | 13.2 | 13.8 | 6.8 | 57.6 | 342.9 | 20592.4 | 825.7 |
| 14 | 15 | FRA | 27 | Isabelle JOSCHKE | MACSF | 07:30:00 | -11.387778 | -30.384444 | 192.0 | 182.0 | ... | 18.6 | 16.9 | 13.3 | 16.9 | 16.1 | 8.2 | 74.6 | 405.9 | 20650.3 | 883.5 |
| 15 | 16 | FRA | 30 | Clarisse CREMER | Banque Populaire X | 07:30:00 | -9.366111 | -30.128056 | 173.0 | 172.0 | ... | 13.6 | 13.7 | 13.9 | 13.3 | 13.2 | 7.2 | 54.6 | 329.6 | 20756.5 | 989.7 |
| 16 | 17 | FRA | 49 | Romain ATTANASIO | Pure - Best Western Hotels and Resorts | 07:30:00 | -9.289722 | -30.221944 | 172.0 | 175.0 | ... | 13.7 | 13.5 | 13.8 | 13.2 | 12.9 | 7.1 | 54.9 | 324.6 | 20762.5 | 995.7 |
| 17 | 18 | SUI | 07 | Alan ROURA | La Fabrique | 07:30:00 | -8.536111 | -30.510556 | 178.0 | 181.0 | ... | 15.0 | 13.8 | 14.9 | 14.0 | 12.7 | 7.9 | 60.0 | 332.2 | 20810.9 | 1044.1 |
| 18 | 19 | FRA | 92 | Stéphane LE DIRAISON | Time For Oceans | 07:30:00 | -3.855278 | -30.595556 | 174.0 | 178.0 | ... | 13.5 | 11.9 | 14.4 | 13.0 | 10.9 | 7.4 | 54.2 | 285.1 | 21077.7 | 1311.0 |
| 19 | 20 | ESP | 33 | Didac COSTA | One Planet One Ocean | 07:30:00 | 4.541389 | -28.036667 | 189.0 | 187.0 | ... | 7.0 | 5.6 | 7.8 | 6.7 | 5.0 | 4.2 | 27.9 | 133.5 | 21528.5 | 1761.7 |
| 20 | 21 | GBR | 777 | Pip HARE | Medallia | 07:30:00 | 4.374722 | -28.984722 | 176.0 | 182.0 | ... | 8.0 | 5.5 | 7.4 | 7.7 | 5.3 | 3.7 | 31.9 | 132.2 | 21529.9 | 1763.1 |
| 21 | 22 | FRA | 14 | Arnaud BOISSIERES | La Mie Câline - Artisans Artipôle | 07:30:00 | 4.505278 | -28.941389 | 198.0 | 202.0 | ... | 6.5 | 5.9 | 6.4 | 5.5 | 5.7 | 3.7 | 26.1 | 141.3 | 21536.9 | 1770.2 |
| 22 | 23 | FRA | 71 | Manuel COUSIN | Groupe Sétin | 07:30:00 | 5.186111 | -28.701389 | 190.0 | 205.0 | ... | 6.8 | 5.7 | 8.3 | 5.6 | 5.1 | 4.5 | 27.3 | 136.5 | 21574.2 | 1807.4 |
| 23 | 24 | FRA | 02 | Armel TRIPON | L'Occitane en Provence | 07:30:00 | 7.360833 | -29.234722 | 193.0 | 183.0 | ... | 11.9 | 11.0 | 8.4 | 11.5 | 10.8 | 4.6 | 47.6 | 264.4 | 21708.3 | 1941.6 |
| 24 | 25 | FRA | 72 | Alexia BARRIER | TSE - 4myplanet | 07:30:00 | 9.653333 | -27.368611 | 171.0 | 171.0 | ... | 10.2 | 9.4 | 10.4 | 10.2 | 9.4 | 5.2 | 40.6 | 226.3 | 21824.7 | 2057.9 |
| 25 | 26 | FRA | 83 | Clément GIRAUD | Compagnie du lit - Jiliti | 07:30:00 | 9.881944 | -27.886111 | 186.0 | 185.0 | ... | 11.8 | 10.5 | 11.8 | 11.4 | 10.2 | 6.1 | 47.0 | 251.1 | 21842.6 | 2075.9 |
| 26 | 27 | FRA | 50 | Miranda MERRON | Campagne de France | 07:30:00 | 9.850000 | -28.260000 | 188.0 | 189.0 | ... | 9.8 | 9.0 | 8.9 | 9.3 | 8.5 | 4.6 | 39.1 | 215.0 | 21844.7 | 2077.9 |
| 27 | 28 | FIN | 222 | Ari HUUSELA | Stark | 07:30:00 | 10.326111 | -27.687222 | 177.0 | 175.0 | ... | 9.7 | 8.3 | 9.8 | 9.7 | 7.9 | 4.9 | 38.8 | 198.0 | 21866.9 | 2100.1 |
| 28 | 29 | FRA | 69 | Sébastien DESTREMAU | Merci | 07:30:00 | 10.338611 | -27.990556 | 173.0 | 183.0 | ... | 9.6 | 9.1 | 9.3 | 9.3 | 8.7 | 4.7 | 38.3 | 218.5 | 21870.5 | 2103.8 |
| 29 | 30 | FRA | 56 | Fabrice AMEDEO | Newrest - Art et Fenetres | 07:30:00 | 10.175556 | -29.120556 | 187.0 | 182.0 | ... | 9.9 | 10.8 | 10.2 | 9.7 | 10.6 | 5.4 | 39.6 | 259.6 | 21872.9 | 2106.1 |
| 30 | 31 | JPN | 11 | Kojiro SHIRAISHI | DMG MORI Global One | 07:30:00 | 14.910000 | -29.125556 | 165.0 | 176.0 | ... | 9.6 | 7.4 | 10.4 | 9.6 | 2.5 | 5.2 | 38.5 | 177.2 | 22152.5 | 2385.7 |
| 31 | 32 | FRA | 08 | Jérémie BEYOU | Charal | 07:30:00 | 27.471389 | -19.624722 | 178.0 | 184.0 | ... | 13.7 | 14.1 | 16.9 | 13.6 | 14.1 | 8.7 | 54.8 | 338.5 | 22909.6 | 3142.8 |
| 32 | RET | FRA | 06 | Nicolas TROUSSEL | CORUM L'Épargne | NaT | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
33 rows × 22 columns
prepare_excel_files('20201122_080000').dtypes
Rang object
Nationalité object
Numéro du voile object
Nom du skipper object
Nom du voile object
Heure (UTC) object
Latitude float64
Longitude float64
Cap (°) Depuis 30 minutes float64
Depuis le dernier classement float64
Depuis 24 heures float64
Vitesse (kts) Depuis 30 minutes float64
Depuis le dernier classement float64
Depuis 24 heures float64
VMG (kts) Depuis 30 minutes float64
Depuis le dernier classement float64
Depuis 24 heures float64
Distance (nm) Depuis 30 minutes float64
Depuis le dernier classement float64
Depuis 24 heures float64
DTF (nm) float64
DTL (nm) float64
dtype: object
Cependant, ce programme n'est valable que pour certains fichiers puisque vers la fin de la course, le format des fichiers Excel change avec les arrivées des voiliers. On va donc s'arrêter juste avant. Le nouveau format commence dès le classement du 27 Janvier 2021 à 17h (UTC). Nous allons nous contenter des classements sortis avant cette date, c'est-à-dire jusqu'à celui du 27 Janvier 2021 à 14h (UTC).
def get_ranking_dates():
url = 'https://www.vendeeglobe.org/fr/classement'
html_code = requests.get(url).text
soup = BeautifulSoup(html_code, 'lxml')
liste = []
for x in soup.find('div', {'class': "rankings__bar m--left"}).find_all('option')[1:-1]:
liste.append(x['value'])
return liste
liste_dates = list(reversed(get_ranking_dates()))
liste_dates = liste_dates[:liste_dates.index('20210127_170000')]
liste_dataframes = []
for d in liste_dates:
df = prepare_excel_files(d)
df['Date du classement'] = pd.to_datetime(d, format="%Y%m%d_%H%M%S")
df.set_index('Date du classement', append=True, inplace=True)
df = df.swaplevel()
liste_dataframes.append(df)
print(d)
20201108_140000 20201108_150000 20201108_170000 20201108_210000 20201109_040000 20201109_080000 20201109_110000 20201109_140000 20201109_170000 20201109_210000 20201110_040000 20201110_080000 20201110_110000 20201110_140000 20201110_170000 20201110_210000 20201111_040000 20201111_080000 20201111_110000 20201111_140000 20201111_170000 20201111_210000 20201112_040000 20201112_080000 20201112_110000 20201112_140000 20201112_170000 20201112_210000 20201113_040000 20201113_080000 20201113_110000 20201113_140000 20201113_170000 20201113_210000 20201114_040000 20201114_080000 20201114_110000 20201114_140000 20201114_170000 20201114_210000 20201115_040000 20201115_080000 20201115_110000 20201115_140000 20201115_170000 20201115_210000 20201116_040000 20201116_080000 20201116_110000 20201116_140000 20201116_170000 20201116_210000 20201117_040000 20201117_080000 20201117_110000 20201117_140000 20201117_170000 20201117_210000 20201118_040000 20201118_080000 20201118_110000 20201118_140000 20201118_170000 20201118_210000 20201119_040000 20201119_080000 20201119_110000 20201119_140000 20201119_170000 20201119_210000 20201120_040000 20201120_080000 20201120_110000 20201120_140000 20201120_170000 20201120_210000 20201121_040000 20201121_080000 20201121_110000 20201121_140000 20201121_170000 20201121_210000 20201122_040000 20201122_080000 20201122_110000 20201122_140000 20201122_170000 20201122_210000 20201123_040000 20201123_080000 20201123_110000 20201123_140000 20201123_170000 20201123_210000 20201124_040000 20201124_080000 20201124_110000 20201124_140000 20201124_170000 20201124_210000 20201125_040000 20201125_080000 20201125_110000 20201125_140000 20201125_170000 20201125_210000 20201126_040000 20201126_080000 20201126_110000 20201126_140000 20201126_170000 20201126_210000 20201127_040000 20201127_080000 20201127_110000 20201127_140000 20201127_170000 20201127_210000 20201128_040000 20201128_080000 20201128_110000 20201128_140000 20201128_170000 20201128_210000 20201129_040000 20201129_080000 20201129_110000 20201129_140000 20201129_170000 20201129_210000 20201130_040000 20201130_080000 20201130_110000 20201130_140000 20201130_170000 20201130_210000 20201201_040000 20201201_080000 20201201_110000 20201201_140000 20201201_170000 20201201_210000 20201202_040000 20201202_080000 20201202_110000 20201202_140000 20201202_170000 20201202_210000 20201203_040000 20201203_080000 20201203_110000 20201203_140000 20201203_170000 20201203_210000 20201204_040000 20201204_080000 20201204_110000 20201204_140000 20201204_170000 20201204_210000 20201205_040000 20201205_080000 20201205_110000 20201205_140000 20201205_170000 20201205_210000 20201206_040000 20201206_080000 20201206_110000 20201206_140000 20201206_170000 20201206_210000 20201207_040000 20201207_080000 20201207_110000 20201207_140000 20201207_170000 20201207_210000 20201208_040000 20201208_080000 20201208_110000 20201208_140000 20201208_170000 20201208_210000 20201209_040000 20201209_080000 20201209_110000 20201209_140000 20201209_170000 20201209_210000 20201210_040000 20201210_080000 20201210_110000 20201210_140000 20201210_170000 20201210_210000 20201211_040000 20201211_080000 20201211_110000 20201211_140000 20201211_170000 20201211_210000 20201212_040000 20201212_080000 20201212_110000 20201212_140000 20201212_170000 20201212_210000 20201213_040000 20201213_080000 20201213_110000 20201213_140000 20201213_170000 20201213_210000 20201213_210000 20201214_040000 20201214_080000 20201214_110000 20201214_140000 20201214_170000 20201214_210000 20201215_040000 20201215_080000 20201215_110000 20201215_140000 20201215_170000 20201215_210000 20201216_040000 20201216_080000 20201216_110000 20201216_140000 20201216_170000 20201216_210000 20201217_040000 20201217_080000 20201217_110000 20201217_140000 20201217_170000 20201217_210000 20201218_040000 20201218_080000 20201218_110000 20201218_140000 20201218_170000 20201218_210000 20201219_040000 20201219_080000 20201219_110000 20201219_140000 20201219_170000 20201219_210000 20201220_040000 20201220_080000 20201220_110000 20201220_140000 20201220_170000 20201220_210000 20201221_040000 20201221_080000 20201221_110000 20201221_140000 20201221_170000 20201221_210000 20201222_040000 20201222_080000 20201222_110000 20201222_140000 20201222_170000 20201222_210000 20201223_040000 20201223_080000 20201223_110000 20201223_140000 20201223_170000 20201223_210000 20201224_040000 20201224_080000 20201224_110000 20201224_140000 20201224_170000 20201224_210000 20201225_040000 20201225_080000 20201225_110000 20201225_140000 20201225_170000 20201225_210000 20201226_040000 20201226_080000 20201226_110000 20201226_140000 20201226_170000 20201226_210000 20201227_040000 20201227_080000 20201227_110000 20201227_140000 20201227_170000 20201227_210000 20201228_040000 20201228_080000 20201228_110000 20201228_140000 20201228_170000 20201228_210000 20201229_040000 20201229_080000 20201229_110000 20201229_140000 20201229_170000 20201229_210000 20201230_040000 20201230_080000 20201230_110000 20201230_110000 20201230_140000 20201230_170000 20201230_210000 20201231_040000 20201231_080000 20201231_110000 20201231_140000 20201231_170000 20201231_210000 20210101_040000 20210101_080000 20210101_110000 20210101_140000 20210101_170000 20210101_210000 20210102_040000 20210102_080000 20210102_110000 20210102_140000 20210102_170000 20210102_210000 20210103_040000 20210103_080000 20210103_110000 20210103_140000 20210103_170000 20210103_210000 20210104_040000 20210104_080000 20210104_110000 20210104_140000 20210104_170000 20210104_210000 20210105_040000 20210105_080000 20210105_110000 20210105_140000 20210105_170000 20210105_210000 20210106_040000 20210106_080000 20210106_110000 20210106_140000 20210106_170000 20210106_210000 20210107_040000 20210107_080000 20210107_110000 20210107_140000 20210107_170000 20210107_210000 20210108_040000 20210108_080000 20210108_110000 20210108_140000 20210108_170000 20210108_210000 20210109_040000 20210109_040000 20210109_080000 20210109_110000 20210109_140000 20210109_170000 20210109_210000 20210110_040000 20210110_080000 20210110_110000 20210110_140000 20210110_170000 20210110_210000 20210111_040000 20210111_080000 20210111_110000 20210111_140000 20210111_170000 20210111_210000 20210112_040000 20210112_080000 20210112_110000 20210112_140000 20210112_170000 20210112_210000 20210113_040000 20210113_080000 20210113_110000 20210113_140000 20210113_170000 20210113_210000 20210114_040000 20210114_080000 20210114_110000 20210114_140000 20210114_170000 20210114_210000 20210115_040000 20210115_080000 20210115_110000 20210115_140000 20210115_170000 20210115_210000 20210116_040000 20210116_080000 20210116_110000 20210116_140000 20210116_170000 20210116_210000 20210117_040000 20210117_080000 20210117_110000 20210117_140000 20210117_170000 20210117_210000 20210118_040000 20210118_080000 20210118_110000 20210118_140000 20210118_170000 20210118_210000 20210119_040000 20210119_080000 20210119_110000 20210119_140000 20210119_170000 20210119_210000 20210120_040000 20210120_080000 20210120_110000 20210120_140000 20210120_170000 20210120_210000 20210121_040000 20210121_080000 20210121_110000 20210121_140000 20210121_170000 20210121_210000 20210122_040000 20210122_080000 20210122_110000 20210122_140000 20210122_170000 20210122_210000 20210123_040000 20210123_080000 20210123_110000 20210123_140000 20210123_170000 20210123_210000 20210124_040000 20210124_080000 20210124_110000 20210124_140000 20210124_170000 20210124_210000 20210125_040000 20210125_080000 20210125_110000 20210125_140000 20210125_170000 20210125_210000 20210126_040000 20210126_080000 20210126_110000 20210126_140000 20210126_170000 20210126_210000 20210127_040000 20210127_080000 20210127_110000 20210127_140000
len(liste_dates) == len(liste_dataframes)
True
df_classements = pd.concat(liste_dataframes)
df_classements
| Rang | Nationalité | Numéro du voile | Nom du skipper | Nom du voile | Heure (UTC) | Latitude | Longitude | Cap (°) | ... | Vitesse (kts) | VMG (kts) | Distance (nm) | DTF (nm) | DTL (nm) | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Depuis 30 minutes | Depuis le dernier classement | ... | Depuis le dernier classement | Depuis 24 heures | Depuis 30 minutes | Depuis le dernier classement | Depuis 24 heures | Depuis 30 minutes | Depuis le dernier classement | Depuis 24 heures | ||||||||||||
| Date du classement | ||||||||||||||||||||||
| 2020-11-08 14:00:00 | 0 | 1 | FRA | 18 | Louis BURTON | Bureau Vallée 2 | 14:30:00 | 46.412778 | -1.846667 | 241.0 | 357.0 | ... | 0.0 | 0.3 | 17.5 | 0.0 | 0.3 | 0.3 | 2788.0 | 6.1 | 24293.9 | 0.0 |
| 1 | 2 | MON | 10 | Boris HERRMANN | Seaexplorer - Yacht Club De Monaco | 14:31:00 | 46.409444 | -1.839444 | 241.0 | 357.0 | ... | 0.0 | 0.3 | 10.9 | 0.0 | 0.2 | 0.4 | 2787.9 | 6.0 | 24294.2 | 0.4 | |
| 2 | 3 | FRA | 08 | Jérémie BEYOU | Charal | 14:30:00 | 46.425278 | -1.844167 | 244.0 | 357.0 | ... | 0.0 | 0.2 | 15.5 | 0.0 | 0.2 | 0.5 | 2788.5 | 5.5 | 24294.3 | 0.5 | |
| 3 | 4 | FRA | 59 | Thomas RUYANT | LinkedOut | 14:30:00 | 46.419722 | -1.835556 | 244.0 | 357.0 | ... | 0.0 | 0.2 | 13.1 | 0.0 | 0.2 | 0.7 | 2788.3 | 5.6 | 24294.5 | 0.6 | |
| 4 | 5 | FRA | 53 | Maxime SOREL | V And B Mayenne | 14:30:00 | 46.416389 | -1.832222 | 246.0 | 357.0 | ... | 0.0 | 0.8 | 10.9 | 0.0 | 0.7 | 0.2 | 2788.1 | 5.8 | 24294.5 | 0.6 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-01-27 14:00:00 | 28 | RET | FRA | 109 | Samantha DAVIES | Initiatives - Coeur | NaT | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 29 | RET | FRA | 04 | Sébastien SIMON | ARKEA PAPREC | NaT | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 30 | RET | GBR | 99 | Alex THOMSON | HUGO BOSS | NaT | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 31 | RET | FRA | 85 | Kevin ESCOFFIER | PRB | NaT | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| 32 | RET | FRA | 06 | Nicolas TROUSSEL | CORUM L'Épargne | NaT | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
16005 rows × 22 columns
def prepare_boats_infos():
# Noms des skippers
df = get_boats_infos().reset_index()
nom = df['Nom du skipper'].str.split(" ", n=1)
df['Nom du skipper'] = nom.apply(lambda x: x[0].capitalize()+" "+x[1].upper())
df = df.set_index('Nom du skipper')
# Nationalité - Numéro de voile
df['Nat/Num'] = ['FRA 56', 'FRA 49', 'FRA 72', 'FRA 17', 'FRA 08', 'FRA 14', 'FRA 18', 'ESP 33', 'FRA 71',
'FRA 30', 'FRA 79', 'FRA 109', 'FRA 69', 'FRA 09', 'FRA 85', 'FRA 83', np.nan, 'GBR 777',
'MON 10', 'FIN 222', 'FRA 27', 'FRA 01', 'FRA 92', 'FRA 50', 'ITA 34', 'SUI 07', 'FRA 59',
'FRA 1000', 'JPN 11', 'FRA 04', 'FRA 53', 'GBR 99', 'FRA 02', 'FRA 06']
df['Nationalité'] = df['Nat/Num'].str.split(" ").str[0]
df['Numéro du voile'] = df['Nat/Num'].str.split(" ").str[1]
df = df.drop('Numéro de voile', axis=1)
df = df.drop('Nat/Num', axis=1)
# Date de lancement
Mois_francais = ['Janvier', 'Février', 'Mars', 'Avril', 'Mai', 'Juin', 'Juillet', 'Août', 'Septembre']
Mois_anglais = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September']
for i in range(9):
df['Date de lancement'] = df['Date de lancement'].str.replace(Mois_francais[i], Mois_anglais[i])
df['Date de lancement'] = pd.to_datetime(df['Date de lancement'], format='%d %B %Y')
# Longueur, Largeur, Tirant d'eau, Déplacement (poids), Hauteur mât, Surface de voiles au près, Surface de voiles au portant
df['Longueur'] = df['Longueur'].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df['Largeur'] = df['Largeur'].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df["Tirant d'eau"] = df["Tirant d'eau"].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df.loc[df['Déplacement (poids)'].isin(['NC', 'nc']), 'Déplacement (poids)'] = np.nan
df['Déplacement (poids)'] = df['Déplacement (poids)'].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df['Hauteur mât'] = df['Hauteur mât'].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df['Surface de voiles au près'] = df['Surface de voiles au près'].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df['Surface de voiles au portant'] = df['Surface de voiles au portant'].apply(lambda x: re.findall(r"\d+,?\d*", x)[0] if pd.notnull(x) else x)
df['Longueur'] = df['Longueur'].str.replace(',', '.')
df['Largeur'] = df['Largeur'].str.replace(',', '.')
df["Tirant d'eau"] = df["Tirant d'eau"].str.replace(',', '.')
df['Déplacement (poids)'] = df['Déplacement (poids)'].str.replace(',', '.')
df['Hauteur mât'] = df['Hauteur mât'].str.replace(',', '.')
df['Longueur'] = df['Longueur'].str.replace(',', '.')
df['Longueur'] = pd.to_numeric(df['Longueur'])
df['Largeur'] = pd.to_numeric(df['Largeur'])
df["Tirant d'eau"] = pd.to_numeric(df["Tirant d'eau"])
df['Déplacement (poids)'] = pd.to_numeric(df['Déplacement (poids)'])
df['Hauteur mât'] = pd.to_numeric(df['Hauteur mât'])
df['Surface de voiles au près'] = pd.to_numeric(df['Surface de voiles au près'])
df['Surface de voiles au portant'] = pd.to_numeric(df['Surface de voiles au portant'])
# Ordre des colonnes
cols = df.columns.tolist()
df = df[[cols[0]] + cols[-2:] + cols[1:-2]]
return df
df_boats_infos = prepare_boats_infos()
df_boats_infos
| Nom du voile | Nationalité | Numéro du voile | Anciens noms du bateau | Architecte | Chantier | Date de lancement | Longueur | Largeur | Tirant d'eau | Déplacement (poids) | Nombre de dérives | Hauteur mât | Voile quille | Surface de voiles au près | Surface de voiles au portant | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Nom du skipper | ||||||||||||||||
| Fabrice AMEDEO | NEWREST - ART & FENÊTRES | FRA | 56 | No Way Back, Vento di Sardegna | VPLP/Verdier | Persico Marine | 2015-08-01 | 18.28 | 5.85 | 4.5 | 7.0 | foils | 29.0 | monotype | 320.0 | 570.0 |
| Romain ATTANASIO | PURE - Best Western® | FRA | 49 | Gitana Eighty, Synerciel, Newrest-Matmut | Bruce Farr Design | Southern Ocean Marine (Nouvelle Zélande) | 2007-03-08 | 18.28 | 5.80 | 4.5 | 9.0 | 2 | 28.0 | acier forgé | 280.0 | 560.0 |
| Alexia BARRIER | TSE - 4MYPLANET | FRA | 72 | Famille Mary-Etamine du Lys, Initiatives Coeur... | Marc Lombard | MAG France | 1998-03-01 | 18.28 | 5.54 | 4.5 | 9.0 | 2 | 29.0 | acier | 260.0 | 580.0 |
| Yannick BESTAVEN | Maître CoQ IV | FRA | 17 | Safran 2 - Des Voiles et Vous | Verdier - VPLP | CDK Technologies | 2015-03-12 | 18.28 | 5.80 | 4.5 | 8.0 | foils | 29.0 | acier mécano soudé | 310.0 | 550.0 |
| Jérémie BEYOU | CHARAL | FRA | 08 | NaN | VPLP | CDK Technologies | 2018-08-18 | 18.28 | 5.85 | 4.5 | 8.0 | foils | 29.0 | acier | 320.0 | 600.0 |
| Arnaud BOISSIÈRES | LA MIE CÂLINE - ARTISANS ARTIPÔLE | FRA | 14 | Ecover3, Président, Gamesa, Kilcullen Voyager-... | Owen Clarke Design LLP - Clay Oliver | Hakes Marine - Mer Agitée | 2007-08-03 | 18.28 | 5.65 | 4.5 | 7.9 | foils | 29.0 | basculante avec vérin | 300.0 | 610.0 |
| Louis BURTON | BUREAU VALLEE 2 | FRA | 18 | Banque Populaire VIII | Verdier - VPLP | CDK Technologies | 2015-06-09 | 18.28 | 5.80 | 4.5 | 7.6 | foils | 28.0 | acier | 300.0 | 600.0 |
| Didac COSTA | ONE PLANET ONE OCEAN | ESP | 33 | Kingfisher - Educacion sin Fronteras - Forum M... | Owen Clarke Design | Martens Yachts | 2000-02-02 | 18.28 | 5.30 | 4.5 | 8.9 | 2 | 26.0 | acier | 240.0 | 470.0 |
| Manuel COUSIN | GROUPE SÉTIN | FRA | 71 | Paprec-Virbac2, Estrella Damm, We are Water, L... | Bruce Farr Yacht Design | Southern Ocean Marine (Nouvelle-Zélande) | 2007-02-02 | 18.28 | 5.80 | 4.5 | 9.0 | 2 asymétriques | 28.5 | basculante sur vérin hydraulique | 270.0 | 560.0 |
| Clarisse CREMER | BANQUE POPULAIRE X | FRA | 30 | Macif - SMA | Verdier - VPLP | CDK - Mer Agitée | 2011-03-01 | 18.28 | 5.70 | 4.5 | 7.7 | 2 | 29.0 | acier forgé | 340.0 | 570.0 |
| Charlie DALIN | APIVIA | FRA | 79 | NaN | Verdier | CDK technologies - MerConcept | 2019-08-05 | 18.28 | 5.85 | 4.5 | 8.0 | foils | 29.0 | acier | 350.0 | 560.0 |
| Sam DAVIES | INITIATIVES-COEUR | FRA | 109 | Maître CoQ - Banque Populaire VII- Foncia II | VPLP - Verdier | CDK Technologies | 2010-09-20 | 18.28 | 5.70 | 4.5 | 7.8 | foils | 27.0 | acier forgé | 300.0 | 600.0 |
| Sébastien DESTREMAU | MERCI | FRA | 69 | Foresight Natural Energy, Maisonneuve | Lavanos | Artech do Brasil | 2005-01-15 | 18.28 | 5.60 | 4.5 | 8.5 | 2 | 29.0 | acier | 250.0 | 650.0 |
| Benjamin DUTREUX | OMIA - WATER FAMILY | FRA | 09 | Spirit of Yukoh, Neutrogena, Hugo Boss, Estrel... | Bruce Farr Design | Offshore Challenge - Cowes | 2007-07-03 | 18.28 | 5.85 | 4.5 | 8.0 | 2 | 29.0 | acier forgé | 300.0 | 700.0 |
| Kevin ESCOFFIER | PRB | FRA | 85 | NaN | Verdier - VPLP | CDK Technologies | 2010-03-08 | 18.28 | 5.50 | 4.5 | NaN | foils | 27.4 | Acier mécano soudé | 300.0 | 600.0 |
| Clément GIRAUD | Compagnie du Lit / Jiliti | FRA | 83 | Delta Dore, Bureau Vallée, Vers un Monde sans ... | Bruce Farr design | JMV Cherbourg | 2006-07-26 | 18.28 | 5.75 | 4.5 | 8.5 | 2 | 29.0 | acier forgé | 300.0 | 620.0 |
| François GUIFFANT | NaN | NaN | NaN | NaN | NaN | NaN | 1970-01-01 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Pip HARE | MEDALLIA | GBR | 777 | Armor Lux, We Are Water, La Fabrique | Pierre Rolland | Bernard Stamm | 1999-07-03 | 18.28 | 5.70 | 4.5 | 9.0 | 2 | 29.0 | carbone | 300.0 | 580.0 |
| Boris HERRMANN | SEAEXPLORER - YACHT CLUB DE MONACO | MON | 10 | Edmond de Rothschild, Malizia - Yacht Club de ... | Verdier - VPLP | Multiplast | 2015-08-07 | 18.28 | 5.70 | 4.5 | 7.6 | foils | 29.0 | acier | 290.0 | 490.0 |
| Ari HUUSELA | STARK | FIN | 222 | Aviva, GAES Centros Auditivos | Owen Clarke Design | Hakes Marine - Wellington (Nouvelle-Zélande) | 2007-08-06 | 18.28 | 5.80 | 4.5 | 8.5 | 2 | 28.0 | acier | 270.0 | 580.0 |
| Isabelle JOSCHKE | MACSF | FRA | 27 | Quéguiner, Safran | Verdier - VPLP | Chantier Naval de Larros | 2007-07-04 | 18.28 | 5.60 | 4.5 | 7.7 | foils | 27.0 | acier | 300.0 | 650.0 |
| Jean LE CAM | Yes We Cam! | FRA | 01 | Cheminées Poujoulat, Mare, Maître CoQ, Mapfre,... | Bruce Farr design | CDK Technologies | 2007-01-03 | 18.28 | 5.90 | 4.5 | 8.0 | 2 | 28.0 | acier forgé | 300.0 | 620.0 |
| Stéphane LE DIRAISON | TIME FOR OCEANS | FRA | 92 | Hugo Boss, Energa, Compagnie du Lit-Boulogne B... | Finot-Conq Design | Neville Hutton | 2007-06-01 | 18.28 | 5.84 | 4.5 | 8.5 | foils | 28.0 | monotype | 300.0 | 550.0 |
| Miranda MERRON | CAMPAGNE DE FRANCE | FRA | 50 | Great America IV, Mirabaud, Temenos | Owen Clarke | Southern Ocean Marine, Tauranga | 2006-05-04 | 18.28 | 5.50 | 4.5 | 8.5 | 2 | 28.0 | carbone | 330.0 | 600.0 |
| Giancarlo PEDOTE | PRYSMIAN GROUP | ITA | 34 | Saint-Michel - Virbac | VPLP - Verdier | Multiplast | 2015-04-02 | 18.28 | 5.80 | 4.5 | 8.0 | foils | 29.0 | acier forgé | 300.0 | 600.0 |
| Alan ROURA | LA FABRIQUE | SUI | 07 | Brit Air, Votre Nom autour du Monde, MACSF | Groupe Finot-Conq | Multiplast | 2007-08-01 | 18.28 | 5.90 | 4.5 | NaN | foils | 27.0 | carbone | 290.0 | 580.0 |
| Thomas RUYANT | LinkedOut | FRA | 59 | NaN | Verdier | Persico | 2019-09-03 | 18.28 | 5.85 | 4.5 | 8.0 | foils | 29.0 | acier forgé | 350.0 | 560.0 |
| Damien SEGUIN | GROUPE APICIL | FRA | 1000 | Comme Un Seul Homme, DCNS | Groupe Finot-Conq | Multiplast | 2008-08-10 | 18.28 | 5.85 | 4.5 | 8.5 | 2 | 29.0 | acier forgé | 350.0 | 610.0 |
| Kojiro SHIRAISHI | DMG MORI Global One | JPN | 11 | NaN | VPLP | Multiplast | 2019-09-05 | 18.28 | 5.85 | 4.5 | 8.0 | foils | 29.0 | acier forgé | 320.0 | 580.0 |
| Sébastien SIMON | ARKEA PAPREC | FRA | 04 | NaN | Juan Kouyoumdjian | CDK Technologies / Assemblage | 2019-07-19 | 18.28 | 5.70 | 4.5 | 8.0 | foiler | 29.0 | Inox usiné | 260.0 | 600.0 |
| Maxime SOREL | V and B-MAYENNE | FRA | 53 | Le Souffle du Nord, Groupe Bel | Verdier - VPLP | Indiana Yachting (Scarlino, Italie) | 2007-09-07 | 18.28 | 5.50 | 4.5 | 7.7 | 2 | 29.0 | acier | 365.0 | 700.0 |
| Alex THOMSON | HUGO BOSS | GBR | 99 | NaN | VPLP - Alex Thomson Racing (led by Pete Hobson) | Carrington Boats | 2019-08-15 | 18.28 | 5.40 | 4.5 | 7.7 | foils | 29.0 | acier forgé | 330.0 | 630.0 |
| Armel TRIPON | L'OCCITANE EN PROVENCE | FRA | 02 | NaN | Samuel Manuard | Black Pepper© | 2020-01-31 | 18.28 | 5.50 | 4.5 | 7.8 | foils | 28.0 | acier forgé | 270.0 | 535.0 |
| Nicolas TROUSSEL | CORUM L'EPARGNE | FRA | 06 | NaN | Juan Kouyoumdjian | CDK technologies - Mer Agitée | 2020-05-15 | 18.28 | 5.70 | 4.5 | 7.9 | foils | 27.3 | NaN | 270.0 | 535.0 |
cols = df_classements.columns.tolist()
pd.DataFrame({
'Colonne':
cols,
'Nombre de valeurs manquantes':
[df_classements[col].isnull().sum() for col in cols],
'Pourcentage de valeurs manquantes':
[round(((df_classements[col].isnull().sum())*100/(len(df_classements.index))),2).astype(str)+' %' for col in cols]
})
| Colonne | Nombre de valeurs manquantes | Pourcentage de valeurs manquantes | |
|---|---|---|---|
| 0 | (Rang, ) | 0 | 0.0 % |
| 1 | (Nationalité, ) | 0 | 0.0 % |
| 2 | (Numéro du voile, ) | 0 | 0.0 % |
| 3 | (Nom du skipper, ) | 0 | 0.0 % |
| 4 | (Nom du voile, ) | 0 | 0.0 % |
| 5 | (Heure (UTC), ) | 2221 | 13.88 % |
| 6 | (Latitude, ) | 2221 | 13.88 % |
| 7 | (Longitude, ) | 2221 | 13.88 % |
| 8 | (Cap (°), Depuis 30 minutes) | 2221 | 13.88 % |
| 9 | (Cap (°), Depuis le dernier classement) | 2221 | 13.88 % |
| 10 | (Cap (°), Depuis 24 heures) | 2221 | 13.88 % |
| 11 | (Vitesse (kts), Depuis 30 minutes) | 2221 | 13.88 % |
| 12 | (Vitesse (kts), Depuis le dernier classement) | 2221 | 13.88 % |
| 13 | (Vitesse (kts), Depuis 24 heures) | 2221 | 13.88 % |
| 14 | (VMG (kts), Depuis 30 minutes) | 2221 | 13.88 % |
| 15 | (VMG (kts), Depuis le dernier classement) | 2221 | 13.88 % |
| 16 | (VMG (kts), Depuis 24 heures) | 2221 | 13.88 % |
| 17 | (Distance (nm), Depuis 30 minutes) | 2221 | 13.88 % |
| 18 | (Distance (nm), Depuis le dernier classement) | 2221 | 13.88 % |
| 19 | (Distance (nm), Depuis 24 heures) | 2221 | 13.88 % |
| 20 | (DTF (nm), ) | 2221 | 13.88 % |
| 21 | (DTL (nm), ) | 2221 | 13.88 % |
array = df_classements.loc[df_classements['Heure (UTC)'].isna()].index.values
for col in cols[6:]:
print(str(col) + " ==> " + str((array == df_classements.loc[df_classements[col].isna()].index.values).all()))
('Latitude', '') ==> True
('Longitude', '') ==> True
('Cap (°)', 'Depuis 30 minutes') ==> True
('Cap (°)', 'Depuis le dernier classement') ==> True
('Cap (°)', 'Depuis 24 heures') ==> True
('Vitesse (kts)', 'Depuis 30 minutes') ==> True
('Vitesse (kts)', 'Depuis le dernier classement') ==> True
('Vitesse (kts)', 'Depuis 24 heures') ==> True
('VMG (kts)', 'Depuis 30 minutes') ==> True
('VMG (kts)', 'Depuis le dernier classement') ==> True
('VMG (kts)', 'Depuis 24 heures') ==> True
('Distance (nm)', 'Depuis 30 minutes') ==> True
('Distance (nm)', 'Depuis le dernier classement') ==> True
('Distance (nm)', 'Depuis 24 heures') ==> True
('DTF (nm)', '') ==> True
('DTL (nm)', '') ==> True
Les mêmes lignes possèdent des valeurs manquantes dans les colonnes 5 à 21. En d'autres termes, il y a 2221 lignes qui contiennent au moins une valeur manquante.
print("Nombre de lignes avec Rang = 'NL' : "+str(len(df_classements.loc[df_classements['Rang']=='NL'])))
print("Nombre de lignes avec Rang = 'RET' : "+str(len(df_classements.loc[df_classements['Rang']=='RET'])))
Nombre de lignes avec Rang = 'NL' : 5 Nombre de lignes avec Rang = 'RET' : 2216
On remarque que 2216 + 5 = 2221
len(df_classements.loc[(df_classements['Rang']=='NL') & (df_classements['Heure (UTC)'].isna())])
5
len(df_classements.loc[(df_classements['Rang']=='RET') & (df_classements['Heure (UTC)'].isna())])
2216
Conclusion : Les lignes qui contiennent des valeurs NaN sont celles où le rang vaut soit 'NL', soit 'RET'.
➜ On peut négliger ces lignes dans la suite.
cols = df_boats_infos.columns.tolist()
pd.DataFrame({
'Colonne':
cols,
'Nombre de valeurs manquantes':
[df_boats_infos[col].isnull().sum() for col in cols],
'Pourcentage de valeurs manquantes':
[round(((df_boats_infos[col].isnull().sum())*100/(len(df_boats_infos.index))),2).astype(str)+' %' for col in cols]
})
| Colonne | Nombre de valeurs manquantes | Pourcentage de valeurs manquantes | |
|---|---|---|---|
| 0 | Nom du voile | 1 | 2.94 % |
| 1 | Nationalité | 1 | 2.94 % |
| 2 | Numéro du voile | 1 | 2.94 % |
| 3 | Anciens noms du bateau | 10 | 29.41 % |
| 4 | Architecte | 1 | 2.94 % |
| 5 | Chantier | 1 | 2.94 % |
| 6 | Date de lancement | 0 | 0.0 % |
| 7 | Longueur | 1 | 2.94 % |
| 8 | Largeur | 1 | 2.94 % |
| 9 | Tirant d'eau | 1 | 2.94 % |
| 10 | Déplacement (poids) | 3 | 8.82 % |
| 11 | Nombre de dérives | 1 | 2.94 % |
| 12 | Hauteur mât | 1 | 2.94 % |
| 13 | Voile quille | 2 | 5.88 % |
| 14 | Surface de voiles au près | 1 | 2.94 % |
| 15 | Surface de voiles au portant | 1 | 2.94 % |
df_classements = df_classements.dropna()
Maintenant que nous n'avons que des valeurs numériques au niveau du rang, on change le type de cette en int :
df_classements = df_classements.copy()
df_classements['Rang'] = df_classements['Rang'].astype(int)
df_classements
| Rang | Nationalité | Numéro du voile | Nom du skipper | Nom du voile | Heure (UTC) | Latitude | Longitude | Cap (°) | ... | Vitesse (kts) | VMG (kts) | Distance (nm) | DTF (nm) | DTL (nm) | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Depuis 30 minutes | Depuis le dernier classement | ... | Depuis le dernier classement | Depuis 24 heures | Depuis 30 minutes | Depuis le dernier classement | Depuis 24 heures | Depuis 30 minutes | Depuis le dernier classement | Depuis 24 heures | ||||||||||||
| Date du classement | ||||||||||||||||||||||
| 2020-11-08 14:00:00 | 0 | 1 | FRA | 18 | Louis BURTON | Bureau Vallée 2 | 14:30:00 | 46.412778 | -1.846667 | 241.0 | 357.0 | ... | 0.0 | 0.3 | 17.5 | 0.0 | 0.3 | 0.3 | 2788.0 | 6.1 | 24293.9 | 0.0 |
| 1 | 2 | MON | 10 | Boris HERRMANN | Seaexplorer - Yacht Club De Monaco | 14:31:00 | 46.409444 | -1.839444 | 241.0 | 357.0 | ... | 0.0 | 0.3 | 10.9 | 0.0 | 0.2 | 0.4 | 2787.9 | 6.0 | 24294.2 | 0.4 | |
| 2 | 3 | FRA | 08 | Jérémie BEYOU | Charal | 14:30:00 | 46.425278 | -1.844167 | 244.0 | 357.0 | ... | 0.0 | 0.2 | 15.5 | 0.0 | 0.2 | 0.5 | 2788.5 | 5.5 | 24294.3 | 0.5 | |
| 3 | 4 | FRA | 59 | Thomas RUYANT | LinkedOut | 14:30:00 | 46.419722 | -1.835556 | 244.0 | 357.0 | ... | 0.0 | 0.2 | 13.1 | 0.0 | 0.2 | 0.7 | 2788.3 | 5.6 | 24294.5 | 0.6 | |
| 4 | 5 | FRA | 53 | Maxime SOREL | V And B Mayenne | 14:30:00 | 46.416389 | -1.832222 | 246.0 | 357.0 | ... | 0.0 | 0.8 | 10.9 | 0.0 | 0.7 | 0.2 | 2788.1 | 5.8 | 24294.5 | 0.6 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-01-27 14:00:00 | 20 | 21 | FRA | 71 | Manuel COUSIN | Groupe Sétin | 13:30:00 | -24.011667 | -26.806389 | 347.0 | 355.0 | ... | 13.7 | 12.9 | 7.9 | 13.0 | 12.0 | 4.4 | 41.1 | 310.4 | 4521.7 | 4441.2 |
| 21 | 22 | FRA | 50 | Miranda MERRON | Campagne de France | 13:30:00 | -26.187778 | -26.041944 | 348.0 | 349.0 | ... | 11.8 | 10.5 | 9.9 | 10.8 | 9.8 | 5.4 | 35.5 | 252.0 | 4639.1 | 4558.6 | |
| 22 | 23 | FRA | 83 | Clément GIRAUD | Compagnie du lit - Jiliti | 13:30:00 | -26.001944 | -29.507500 | 7.0 | 1.0 | ... | 10.1 | 9.6 | 10.9 | 9.8 | 7.9 | 5.5 | 30.3 | 230.4 | 4674.6 | 4594.1 | |
| 23 | 24 | FRA | 72 | Alexia BARRIER | TSE - 4myplanet | 13:30:00 | -48.151389 | -53.862500 | 25.0 | 23.0 | ... | 9.1 | 11.4 | 10.0 | 9.0 | 11.4 | 5.0 | 27.3 | 273.7 | 6341.5 | 6260.9 | |
| 24 | 25 | FIN | 222 | Ari HUUSELA | Stark | 13:30:00 | -49.383611 | -52.491389 | 44.0 | 38.0 | ... | 10.8 | 11.5 | 9.2 | 10.7 | 11.5 | 4.7 | 32.5 | 275.8 | 6376.7 | 6296.1 | |
13784 rows × 22 columns
# Correlation matrix
plt.figure(figsize=(13,13))
plt.title('Matrice de corrélation', fontsize=20, pad=20)
sn.heatmap(df_classements.corr(numeric_only=True), annot = True)
plt.xlabel("Colonnes numériques", fontsize=14, color='darkred')
plt.ylabel("Colonnes numériques", fontsize=14, color='darkred')
plt.show()
df1 = df_classements.groupby('Rang').agg({('VMG (kts)', 'Depuis le dernier classement'): 'mean'})
df1 = df1.droplevel(1, axis=1)
df1 = df1.reset_index()
df1 = df1.rename(columns={'VMG (kts)': 'VMG moyenne (kts)'})
df1
| Rang | VMG moyenne (kts) | |
|---|---|---|
| 0 | 1 | 12.860825 |
| 1 | 2 | 12.815670 |
| 2 | 3 | 12.820206 |
| 3 | 4 | 12.709691 |
| 4 | 5 | 12.777320 |
| 5 | 6 | 12.697526 |
| 6 | 7 | 12.711546 |
| 7 | 8 | 12.563299 |
| 8 | 9 | 12.366804 |
| 9 | 10 | 12.321649 |
| 10 | 11 | 12.450928 |
| 11 | 12 | 11.920412 |
| 12 | 13 | 11.702062 |
| 13 | 14 | 11.999175 |
| 14 | 15 | 11.442680 |
| 15 | 16 | 11.398763 |
| 16 | 17 | 11.270309 |
| 17 | 18 | 11.513196 |
| 18 | 19 | 11.521031 |
| 19 | 20 | 11.249897 |
| 20 | 21 | 11.074021 |
| 21 | 22 | 11.006392 |
| 22 | 23 | 10.667423 |
| 23 | 24 | 10.290309 |
| 24 | 25 | 9.880619 |
| 25 | 26 | 8.798333 |
| 26 | 27 | 8.475989 |
| 27 | 28 | 9.142000 |
| 28 | 29 | 7.983436 |
| 29 | 30 | 7.796835 |
| 30 | 31 | 8.146795 |
| 31 | 32 | 8.251471 |
| 32 | 33 | 0.193617 |
print("Corrélation entre le rang et la vitesse utile moyenne : "+str(df1['Rang'].corr(df1['VMG moyenne (kts)'])))
Corrélation entre le rang et la vitesse utile moyenne : -0.8110886438639094
# Régression linéaire
X = np.asarray(df1['Rang'])
Y = np.asarray(df1['VMG moyenne (kts)'])
X = X.reshape(-1,1)
reg = lm.LinearRegression(fit_intercept=True)
reg.fit(X,Y)
Y_pred = reg.predict(X) # Valeurs prédites par le modèle
b0 = reg.intercept_
b1 = reg.coef_[0]
R2 = reg.score(X,Y)
SSE = np.sum((Y - Y_pred)**2) ### Error sum of squares
SSR = np.sum((Y_pred - Y.mean())**2) ### Regression sum of squares
SST = np.sum((Y - Y.mean())**2) ### Total sum of squares
print("intercept = " + str(round(b0,4)))
print("pente = " + str(round(b1,4)))
print("\nSSE = " + str(round(SSE,4)))
print("SSR = " + str(round(SSR,4)))
print("SST = " + str(round(SST,4)))
print("\nR2 = " + str(round(R2*100,2)) + "%")
intercept = 14.3156 pente = -0.2096 SSE = 68.3728 SSR = 131.4687 SST = 199.8415 R2 = 65.79%
# Analyse de la variance
pd.DataFrame(data = {'Source de variation': ['Expliquée', 'Résiduelle', 'Totale'],
'Somme des carrés': [SSR, SSE, SST],
'ddl': [1, len(Y)-2, len(Y)-1],
'Carré moyen': [SSR, SSE/(len(Y)-2), SST/(len(Y)-1)]}).set_index('Source de variation')
| Somme des carrés | ddl | Carré moyen | |
|---|---|---|---|
| Source de variation | |||
| Expliquée | 131.468713 | 1 | 131.468713 |
| Résiduelle | 68.372828 | 31 | 2.205575 |
| Totale | 199.841541 | 32 | 6.245048 |
fig = plt.figure(figsize=(8,6))
plt.plot(X, Y, 'o', label="Observations", color="cornflowerblue")
plt.plot(X, Y_pred, label="Droite de régression", color="darkgoldenrod")
plt.plot(X.mean(), Y.mean(), marker='o', markersize=9, markeredgecolor='brown', markerfacecolor='brown', label="Point moyen")
axes = plt.gca()
x_min, x_max = axes.get_xlim()
y_min, y_max = axes.get_ylim()
xmax = (X.mean()-x_min)/(x_max-x_min)
ymax = (Y.mean()-y_min)/(y_max-y_min)
plt.axhline(y=Y.mean(), xmin=0, xmax=xmax, linestyle='--', color='forestgreen')
plt.axvline(x=X.mean(), ymin=0, ymax=ymax, linestyle='--', color='forestgreen')
plt.text(x_min+0.2, Y.mean()+0.2, str(round(Y.mean(),3)), fontstyle='italic', color='seagreen', fontsize=8)
plt.text(X.mean()+0.2, y_min+0.2, str(round(X.mean(),3)), fontstyle='italic', color='seagreen', fontsize=8)
plt.title("Régression linéaire du rang sur la vitesse utile moyenne")
plt.xlabel("Rang", color='darkred')
plt.ylabel("VMG moyenne", color='darkred')
plt.legend(loc='upper right')
plt.show()
fig = plt.figure(figsize=(8,6))
plt.plot(Y_pred, Y-Y_pred, 'o', color="cornflowerblue")
plt.plot(Y.mean(), 0, marker='o', markersize=9, markeredgecolor='brown', markerfacecolor='brown', label="Point moyen")
plt.axhline(y=0, xmin=0, xmax=1, color='darkgoldenrod', label="Résidu = 0")
axes = plt.gca()
y_min, y_max = axes.get_ylim()
ymax = -y_min / (y_max-y_min)
plt.axvline(x=Y.mean(), ymin=0, ymax=ymax, linestyle='--', color='forestgreen')
plt.text(Y.mean()+0.02, y_min+0.1, str(round(Y.mean(),3)), fontstyle='italic', color='seagreen', fontsize=8)
plt.title("Graphique croisant les valeurs prédites et les résidus")
plt.xlabel("Valeurs prédites", color='darkred')
plt.ylabel("Résidus", color='darkred')
plt.legend(loc='upper right')
plt.show()
# Distance de Cook : Elle mesure l'impact de chaque observation sur l'équation de régression
model = sm.OLS(Y, sm.add_constant(X)).fit()
cooks = model.get_influence().cooks_distance
fig = plt.figure(figsize=(12,6))
plt.bar(list(range(len(df1))) , list(cooks[0]))
plt.xticks(list(range(len(df1))))
plt.title("Distance de Cook pour chaque observation")
plt.xlabel("Numéro de l'observation", color='darkred')
plt.ylabel("Distance de Cook", color='darkred')
plt.show()
La dernière observation (Rang = 33) est un point aberrant.
df2 = df_classements.groupby('Nom du skipper').agg({('Distance (nm)', 'Depuis le dernier classement'): 'sum'})
df2 = df2.droplevel(1, axis=1)
df2 = df2.reset_index()
df2 = df2.rename(columns={'Distance (nm)': 'Distance parcourue (nm)'})
df2['Distance parcourue (nm)'] = df2['Distance parcourue (nm)'].round(2)
df2
| Nom du skipper | Distance parcourue (nm) | |
|---|---|---|
| 0 | Alan ROURA | 26535.3 |
| 1 | Alex THOMSON | 11119.0 |
| 2 | Alexia BARRIER | 23137.2 |
| 3 | Ari HUUSELA | 23749.3 |
| 4 | Armel TRIPON | 29470.7 |
| 5 | Arnaud BOISSIERES | 26505.9 |
| 6 | Benjamin DUTREUX | 29647.9 |
| 7 | Boris HERRMANN | 30508.0 |
| 8 | Charlie DALIN | 31161.1 |
| 9 | Clarisse CREMER | 28086.3 |
| 10 | Clément GIRAUD | 25158.2 |
| 11 | Damien SEGUIN | 29642.5 |
| 12 | Didac COSTA | 26065.0 |
| 13 | Fabrice AMEDEO | 11193.2 |
| 14 | Giancarlo PEDOTE | 30173.9 |
| 15 | Isabelle JOSCHKE | 23675.0 |
| 16 | Jean LE CAM | 29483.6 |
| 17 | Jérémie BEYOU | 28831.4 |
| 18 | Kevin ESCOFFIER | 10450.0 |
| 19 | Kojiro SHIRAISHI | 26848.9 |
| 20 | Louis BURTON | 30753.0 |
| 21 | Manuel COUSIN | 25740.1 |
| 22 | Maxime SOREL | 28927.4 |
| 23 | Miranda MERRON | 24852.2 |
| 24 | Nicolas TROUSSEL | 5274.3 |
| 25 | Pip HARE | 25805.4 |
| 26 | Romain ATTANASIO | 27160.6 |
| 27 | Samantha DAVIES | 11444.8 |
| 28 | Stéphane LE DIRAISON | 26646.3 |
| 29 | Sébastien DESTREMAU | 18715.3 |
| 30 | Sébastien SIMON | 11478.6 |
| 31 | Thomas RUYANT | 30991.2 |
| 32 | Yannick BESTAVEN | 30507.5 |
fig = make_subplots(
rows=2, cols=1,
shared_xaxes=False,
shared_yaxes=False,
vertical_spacing=0.02,
specs=[[{"type": "table"}],
[{"type": "bar"}]],
row_heights=[1,1.2]
)
fig.add_trace(go.Bar(x=df2['Nom du skipper'],
y=df2['Distance parcourue (nm)'],
marker=dict(color=df2['Distance parcourue (nm)'],
colorscale='earth',
showscale=True,
colorbar=dict(len=0.55, x=1.02 ,y=0.27)),
hoverlabel=dict(namelength=0)),
row=2, col=1)
fig.update_xaxes(title="Nom du skipper", title_font=dict(size=20, family='Open Sans', color='#AD2C30'), row=2, col=1)
fig.update_yaxes(title="Distance parcourue (nm)", title_font=dict(size=20, family='Open Sans', color='#AD2C30'), row=2, col=1)
fig.update_layout(height=1200,
title="Distance parcourue par voilier (jusqu'au 27 Janvier 2021 à 14H)",
title_x=0.5,
title_font_family="Times New Roman",
title_font_size=22,
title_font_color="darkred",
font=dict(size=12, color="#150E68")
)
fig.add_trace(go.Table(header=dict(values=["Nom du skipper", "Distance parcourue (nm)"],
font=dict(size=16, color='black'),
align="center",
fill_color='#FBF8E4',
line_color='lightgrey'),
cells=dict(values=[df2[k].tolist() for k in df2.columns],
align = "center",
fill_color='#FFFEF8',
line_color='lightgrey',
height=22)),
row=1, col=1
)
fig.show()
df3 = df_classements[['Nom du skipper', 'Distance (nm)']]
df3 = df3.droplevel(1, axis=0)
df3 = df3.reset_index()
df3 = df3.set_index(['Nom du skipper', 'Date du classement']).sort_values(['Nom du skipper', 'Date du classement'])
df3 = df3.droplevel(0, axis=1)
df3 = df3.drop(['Depuis 30 minutes', 'Depuis 24 heures'], axis=1)
df3 = df3.rename(columns={'Depuis le dernier classement': 'Distance parcourue (nm)'})
df3 = df3.groupby('Nom du skipper').cumsum()
df3
| Distance parcourue (nm) | ||
|---|---|---|
| Nom du skipper | Date du classement | |
| Alan ROURA | 2020-11-08 14:00:00 | 2789.2 |
| 2020-11-08 15:00:00 | 2812.2 | |
| 2020-11-08 17:00:00 | 2841.1 | |
| 2020-11-08 21:00:00 | 2897.4 | |
| 2020-11-09 04:00:00 | 2971.8 | |
| ... | ... | ... |
| Yannick BESTAVEN | 2021-01-26 21:00:00 | 30189.3 |
| 2021-01-27 04:00:00 | 30326.2 | |
| 2021-01-27 08:00:00 | 30403.3 | |
| 2021-01-27 11:00:00 | 30453.1 | |
| 2021-01-27 14:00:00 | 30507.5 |
13784 rows × 1 columns
fig = make_subplots(rows=1, cols=1)
for i in df3.index.levels[0]:
df_dist = df3.loc[i,:].reset_index()
fig.add_trace(go.Scatter(x=df_dist['Date du classement'],
y=df_dist['Distance parcourue (nm)'],
mode='lines',
name=i))
fig.update_xaxes(title="Date du classement",
title_font=dict(size=20, family='Open Sans', color='#546E09'),
row=1, col=1,
rangeslider_visible=True,
nticks=20)
fig.update_yaxes(title="Distance parcourue (nm)",
title_font=dict(size=20, family='Open Sans', color='#546E09'),
row=1, col=1)
fig.update_layout(height=900,
title="Distance parcourue par voilier en fonction de la date du classement",
title_x=0.5,
title_font_family="Times New Roman",
title_font_size=22,
title_font_color="darkred",
font=dict(size=14, color="RebeccaPurple"))
fig.show()
df4 = df_classements[['Nom du skipper', 'DTF (nm)']]
df4 = df4.droplevel(1, axis=0)
df4 = df4.reset_index()
df4 = df4.set_index(['Nom du skipper', 'Date du classement']).sort_values(['Nom du skipper', 'Date du classement'])
df4 = df4.rename(columns={'DTF (nm)': 'Distance restante (nm)'})
df4
| Distance restante (nm) | ||
|---|---|---|
| Nom du skipper | Date du classement | |
| Alan ROURA | 2020-11-08 14:00:00 | 24295.3 |
| 2020-11-08 15:00:00 | 24273.5 | |
| 2020-11-08 17:00:00 | 24250.0 | |
| 2020-11-08 21:00:00 | 24208.9 | |
| 2020-11-09 04:00:00 | 24154.1 | |
| ... | ... | ... |
| Yannick BESTAVEN | 2021-01-26 21:00:00 | 578.2 |
| 2021-01-27 04:00:00 | 443.9 | |
| 2021-01-27 08:00:00 | 367.8 | |
| 2021-01-27 11:00:00 | 318.4 | |
| 2021-01-27 14:00:00 | 264.3 |
13784 rows × 1 columns
fig = make_subplots(rows=1, cols=1)
for i in df4.index.levels[0]:
df_dist = df4.loc[i,:].reset_index()
fig.add_trace(go.Scatter(x=df_dist['Date du classement'],
y=df_dist['Distance restante (nm)'],
mode='lines',
name=i))
fig.update_xaxes(title="Date du classement",
title_font=dict(size=20, family='Open Sans', color='#546E09'),
row=1, col=1,
rangeslider_visible=True,
nticks=20)
fig.update_yaxes(title="Distance restante (nm)",
title_font=dict(size=20, family='Open Sans', color='#546E09'),
row=1, col=1)
fig.update_layout(height=900,
title="Distance restante par voilier en fonction de la date du classement",
title_x=0.5,
title_font_family="Times New Roman",
title_font_size=22,
title_font_color="darkred",
font=dict(size=14, color="RebeccaPurple"))
fig.show()
df5 = df_classements.groupby('Nom du skipper').agg({('VMG (kts)', 'Depuis le dernier classement'): 'mean'})
df5 = df5.droplevel(1, axis=1)
df5 = df5.reset_index()
df5 = df5.rename(columns={'VMG (kts)': 'VMG moyenne (kts)'})
df5 = pd.merge(df5,
df_classements.loc[datetime.datetime(2021,1,27,14,0,0),['Nom du skipper', 'Rang']].droplevel(1, axis=1),
left_on='Nom du skipper',
right_on='Nom du skipper',
how='inner')
df5 = df5.rename(columns={'Rang': 'Rang au classement du 27 janvier 2021 à 14h'})
df5 = pd.merge(df5,
df_boats_infos.reset_index()[['Nom du skipper', 'Nombre de dérives', 'Voile quille',
'Largeur', 'Déplacement (poids)', 'Hauteur mât',
'Surface de voiles au près', 'Surface de voiles au portant']],
left_on='Nom du skipper',
right_on='Nom du skipper',
how='inner')
df5.sort_values('Rang au classement du 27 janvier 2021 à 14h').set_index('Nom du skipper')
| VMG moyenne (kts) | Rang au classement du 27 janvier 2021 à 14h | Nombre de dérives | Voile quille | Largeur | Déplacement (poids) | Hauteur mât | Surface de voiles au près | Surface de voiles au portant | |
|---|---|---|---|---|---|---|---|---|---|
| Nom du skipper | |||||||||
| Charlie DALIN | 12.805165 | 1 | foils | acier | 5.85 | 8.0 | 29.0 | 350.0 | 560.0 |
| Louis BURTON | 12.836701 | 2 | foils | acier | 5.80 | 7.6 | 28.0 | 300.0 | 600.0 |
| Boris HERRMANN | 12.774433 | 3 | foils | acier | 5.70 | 7.6 | 29.0 | 290.0 | 490.0 |
| Thomas RUYANT | 12.685773 | 4 | foils | acier forgé | 5.85 | 8.0 | 29.0 | 350.0 | 560.0 |
| Yannick BESTAVEN | 12.713814 | 5 | foils | acier mécano soudé | 5.80 | 8.0 | 29.0 | 310.0 | 550.0 |
| Damien SEGUIN | 12.646392 | 6 | 2 | acier forgé | 5.85 | 8.5 | 29.0 | 350.0 | 610.0 |
| Giancarlo PEDOTE | 12.632371 | 7 | foils | acier forgé | 5.80 | 8.0 | 29.0 | 300.0 | 600.0 |
| Jean LE CAM | 12.491340 | 8 | 2 | acier forgé | 5.90 | 8.0 | 28.0 | 300.0 | 620.0 |
| Benjamin DUTREUX | 12.451546 | 9 | 2 | acier forgé | 5.85 | 8.0 | 29.0 | 300.0 | 700.0 |
| Maxime SOREL | 12.287423 | 10 | 2 | acier | 5.50 | 7.7 | 29.0 | 365.0 | 700.0 |
| Armel TRIPON | 12.247010 | 11 | foils | acier forgé | 5.50 | 7.8 | 28.0 | 270.0 | 535.0 |
| Clarisse CREMER | 11.941237 | 12 | 2 | acier forgé | 5.70 | 7.7 | 29.0 | 340.0 | 570.0 |
| Romain ATTANASIO | 11.424330 | 13 | 2 | acier forgé | 5.80 | 9.0 | 28.0 | 280.0 | 560.0 |
| Jérémie BEYOU | 11.335464 | 14 | foils | acier | 5.85 | 8.0 | 29.0 | 320.0 | 600.0 |
| Kojiro SHIRAISHI | 10.968866 | 17 | foils | acier forgé | 5.85 | 8.0 | 29.0 | 320.0 | 580.0 |
| Stéphane LE DIRAISON | 10.988247 | 18 | foils | monotype | 5.84 | 8.5 | 28.0 | 300.0 | 550.0 |
| Pip HARE | 10.887835 | 19 | 2 | carbone | 5.70 | 9.0 | 29.0 | 300.0 | 580.0 |
| Didac COSTA | 10.798969 | 20 | 2 | acier | 5.30 | 8.9 | 26.0 | 240.0 | 470.0 |
| Manuel COUSIN | 10.294021 | 21 | 2 asymétriques | basculante sur vérin hydraulique | 5.80 | 9.0 | 28.5 | 270.0 | 560.0 |
| Miranda MERRON | 10.223505 | 22 | 2 | carbone | 5.50 | 8.5 | 28.0 | 330.0 | 600.0 |
| Clément GIRAUD | 10.278144 | 23 | 2 | acier forgé | 5.75 | 8.5 | 29.0 | 300.0 | 620.0 |
| Alexia BARRIER | 9.504124 | 24 | 2 | acier | 5.54 | 9.0 | 29.0 | 260.0 | 580.0 |
| Ari HUUSELA | 9.370722 | 25 | 2 | acier | 5.80 | 8.5 | 28.0 | 270.0 | 580.0 |
On remarque que les foils sont présents au niveau des 5 premiers rang.
# Correlation matrix
plt.figure(figsize=(13,13))
plt.title('Matrice de corrélation', fontsize=20, pad=20)
sn.heatmap(df5.corr(numeric_only=True), annot = True)
plt.show()
On remarque aussi que le déplacement impacte considérablement le classement (corrélation = 0.7).
On utilisera pour cette partie les coordonnées géorgraphiques (Latitude, Longitude) ainsi que la date du classement, et ceci pour chaque voilier.
df6 = df_classements[['Nom du skipper', 'Latitude', 'Longitude']]
df6 = df6.droplevel(1, axis=0)
df6 = df6.reset_index()
df6 = df6.set_index(['Nom du skipper','Date du classement']).sort_values(['Nom du skipper','Date du classement'])
df6
| Latitude | Longitude | ||
|---|---|---|---|
| Nom du skipper | Date du classement | ||
| Alan ROURA | 2020-11-08 14:00:00 | 46.436667 | -1.826667 |
| 2020-11-08 15:00:00 | 46.358889 | -2.370278 | |
| 2020-11-08 17:00:00 | 46.391111 | -3.066111 | |
| 2020-11-08 21:00:00 | 46.470000 | -4.426389 | |
| 2020-11-09 04:00:00 | 46.339444 | -6.205833 | |
| ... | ... | ... | ... |
| Yannick BESTAVEN | 2021-01-26 21:00:00 | 47.595833 | -15.841667 |
| 2021-01-27 04:00:00 | 47.878889 | -12.467222 | |
| 2021-01-27 08:00:00 | 47.893611 | -10.553056 | |
| 2021-01-27 11:00:00 | 47.850278 | -9.317500 | |
| 2021-01-27 14:00:00 | 47.727222 | -7.989167 |
13784 rows × 2 columns
fig = make_subplots(rows=1, cols=1)
for i in df6.index.levels[0]:
df_dist = df6.loc[i,:].reset_index()
fig.add_trace(go.Scattergeo(lat=df_dist['Latitude'],
lon=df_dist['Longitude'],
mode='markers+lines',
marker_size=2,
line_width=1,
name=i,
hovertext=df_dist['Date du classement']))
fig.update_geos(visible=False,
resolution=110,
showcountries=True,
countrycolor="slategrey",
lataxis_showgrid=True,
lataxis_gridcolor="lightgrey",
lonaxis_showgrid=True,
lonaxis_gridcolor="lightgrey",
showocean=True,
oceancolor="lightcyan",
showland=True,
landcolor="moccasin")
fig.update_layout(margin={"t": 0, "b": 0, "l": 0, "r": 0}, width=985, height=660, legend=dict(orientation="h"))
fig.show()